From 1561017c75431662cb53fe1451c5782f4f7cc241 Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Sat, 6 Mar 2021 19:14:35 -0500
Subject: [PATCH 01/32] Added notin operator to resolve #4152

---
 NAMESPACE             |  2 +-
 R/notin.R             |  6 ++++++
 inst/tests/tests.Rraw | 20 ++++++++++++--------
 man/notin.Rd          | 36 ++++++++++++++++++++++++++++++++++++
 4 files changed, 55 insertions(+), 9 deletions(-)
 create mode 100644 R/notin.R
 create mode 100644 man/notin.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 57271aa04d..b6065ce34b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,7 +8,7 @@ exportClasses(data.table, IDate, ITime)
 export(data.table, tables, setkey, setkeyv, key, "key<-", haskey, CJ, SJ, copy)
 export(setindex, setindexv, indices)
 export(as.data.table,is.data.table,test.data.table)
-export(last,first,like,"%like%","%ilike%","%flike%",between,"%between%",inrange,"%inrange%")
+export(last,first,like,"%like%","%ilike%","%flike%",between,"%between%",inrange,"%inrange%", notin, "%notin%")
 export(timetaken)
 export(truelength, setalloccol, alloc.col, ":=")
 export(setattr, setnames, setcolorder, set, setDT, setDF)
diff --git a/R/notin.R b/R/notin.R
new file mode 100644
index 0000000000..e0f3a1b008
--- /dev/null
+++ b/R/notin.R
@@ -0,0 +1,6 @@
+# Intended to be used to create %notin% operator
+notin = function(example, elements) {
+  return(!match(example, elements, nomatch = 0))
+}
+
+"%notin%" = function(example, elements) notin(example, elements)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index c5910f5c81..16f753f7ec 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -4195,7 +4195,7 @@ setNumericRounding(old_rounding)
 
 DT = data.table(id=INT(1,2,1), val1=3:1, val2=3:1, val3=list(2:3,4:6,7:10))   # 5380
 test(1199.1, DT[, sum(.SD), by=id, .SDcols=2:3], data.table(id=1:2, V1=INT(8,4)))   #875 made the .SD case work
-test(1199.2, DT[, sum(.SD), by=id], error="data.*frame.*numeric")  # this is R's error message so use flexible string pattern to insulate from minor changes in R, #4769 
+test(1199.2, DT[, sum(.SD), by=id], error="data.*frame.*numeric")  # this is R's error message so use flexible string pattern to insulate from minor changes in R, #4769
 test(1199.3, DT[, sum(val3), by=id], error="Type 'list' not supported by GForce sum [(]gsum[)]. Either.*or turn off")
 
 # Selection of columns, copy column to maintain the same as R <= 3.0.2, in Rdevel, for now
@@ -10442,7 +10442,7 @@ test(1728.12, DT[order(x,na.last=NA)], DT[2])  # was randomly wrong
 if (test_longdouble) {  #3258
 
   old = options(datatable.verbose=FALSE)  # capture.output() exact tests must not be polluted with verbosity
-  
+
   test(1729.01, fwrite(data.table(V1=c(1), V2=c(9.9999999999999982236431605997495353221893310546875))),
                 output="V1,V2\n1,10")
   test(1729.02, fwrite(data.table(V2=c(9.9999999999999982236431605997495353221893310546875), V1=c(1))),
@@ -10522,8 +10522,8 @@ if (test_longdouble) {  #3258
   #  2.220446e-16   1.110223e-16  2.225074e-308  1.797693e+308
   test(1729.12, typeof(DT[[1L]]), "double")
   test(1729.13, capture.output(fwrite(DT)), capture.output(write.csv(DT,row.names=FALSE,quote=FALSE)))
-  
-  options(old)  # restore the previous datatable.verbose value, for example for the CRAN_Release test with verbose on 
+
+  options(old)  # restore the previous datatable.verbose value, for example for the CRAN_Release test with verbose on
 }
 
 if (test_bit64) {
@@ -10846,7 +10846,7 @@ if (TZnotUTC) {
   # from v1.13.0 these tests work when running under non-UTC because they compare to as.POSIXct which reads these unmarked datetime in local
   # the new tests 2150.* cover more cases
   # from v1.14.0, the tz="" is needed
-  test(1743.25, fread("a,b,c\n2015-06-01 11:00:00,1,ae", colClasses=c("POSIXct","integer","character"), tz=""), 
+  test(1743.25, fread("a,b,c\n2015-06-01 11:00:00,1,ae", colClasses=c("POSIXct","integer","character"), tz=""),
                 data.table(a=as.POSIXct("2015-06-01 11:00:00"),b=1L,c="ae"))
   test(1743.26, fread("a,b,c,d,e,f,g,h\n1,k,2015-06-01 11:00:00,a,1.5,M,9,0", colClasses=list(POSIXct="c", character="b"), drop=c("a","b"), logical01=TRUE, tz=""),
               ans<-data.table(c=as.POSIXct("2015-06-01 11:00:00"), d="a", e=1.5, f="M", g=9L, h=FALSE))
@@ -17143,7 +17143,7 @@ test(2153.2, DT[, .(list(.GRP)), by=x], data.table(x=1:2, V1=as.list(1:2)))
 test(2153.3, ans<-DT[, .(list(.NGRP)), by=x], data.table(x=1:2, V1=list(2L,2L)))
 test(2153.4, address(ans$V1[[1L]]), address(ans$V1[[2L]]))  # .NGRP doesn't change group to group so the same object can be referenced many times unlike .N and .GRP
 test(2153.5, DT[, .(list(c(0L,.N,0L))), by=x],  # c() here will create new object so this is ok anyway; i.e. address(.N) is not present in j's result
-             data.table(x=1:2, V1=list(c(0L,1L,0L), c(0L,2L,0L)))) 
+             data.table(x=1:2, V1=list(c(0L,1L,0L), c(0L,2L,0L))))
 
 # warning message segfault when no column names present, #4644
 test(2154.1, fread("0.0\n", colClasses="integer"), data.table(V1=0.0),
@@ -17161,7 +17161,7 @@ for (i in 0:4) test(2155+i/10,
 
 # dogroups.c eval(j) could create list columns containing altrep references to the specials, #4759
 # thanks to revdep testing of 1.13.2 where package tstools revealed this via ts() creating ALTREP, #4758
-# the attr(value,"class")<-"newclass" lines mimics a line at the end of stats::ts(). When the 
+# the attr(value,"class")<-"newclass" lines mimics a line at the end of stats::ts(). When the
 # length(value)>=64, R creates an ALTREP REF wrapper. Which dogroups.c now catches.
 # Hence this test needs to be at least 128 rows, 2 groups of 64 each.
 DT = data.table(series=c("ts1","ts2"), value=rnorm(128))
@@ -17186,7 +17186,7 @@ test(2158.1, DT[, .(value = list(value)), index],
 DT = data.table(value=as.list(1:6), index=rep(1:2, each=3))
 test(2158.2, DT[, by="index", list(value=list(value))],
              data.table(index=1:2, value=list(as.list(1:3), as.list(4:6))))
-             
+
 # type consistency of empty input to as.matrix.data.table, #4762
 DT = data.table(x = 1)
 test(2159.01, typeof(as.matrix(DT)), "double")
@@ -17263,3 +17263,7 @@ if (identical(x, enc2native(x))) {
 
 # fintersect now preserves order of first argument like intersect, #4716
 test(2163, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$x, c("c", "a"))
+
+
+# Test new feature %notin%, #4152
+test(2164, 11 %notin% 1:10, TRUE)
diff --git a/man/notin.Rd b/man/notin.Rd
new file mode 100644
index 0000000000..ba082d5ace
--- /dev/null
+++ b/man/notin.Rd
@@ -0,0 +1,36 @@
+\name{notin}
+\alias{notin}
+\alias{\%notin\%}
+
+\title{
+Convenience for checking if an example is in a set of elements
+}
+
+\description{
+Intended to behave opposite to  \code{\link[=base]{in}}
+}
+
+\usage{
+notin(example, elements)
+example \%notin\% elements
+}
+
+\arguments{
+   \item{example}{ vector or \code{NULL}: value to be matched }
+   \item{elements}{ vector or \code{NULL}: values to check for a match }
+}
+
+\details{
+ Internally, \code{\%notin\%} is a wrapper around \code{\link[=base]{match}}, much like \code{\%in\%}.
+}
+
+\value{
+    Logical vector, \code{TRUE} indicating whether each \code{example} was found in \code{elements}.
+}
+
+\seealso{ \code{\link[base]{match}} }
+
+\examples{
+  11 \%notin\% 1:10
+}
+

From e23fad8851005eeef01c1f4d5c68b8f878d8dcca Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Sat, 6 Mar 2021 19:35:02 -0500
Subject: [PATCH 02/32] update NEWS to add %notin%

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index a51de94eb6..f17fd6c820 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,6 +6,8 @@
 
 ## NEW FEATURES
 
+1. %notin% added to compute opposite of %in%, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR.
+
 ## BUG FIXES
 
 ## NOTES

From 212b0b2bc2fbfeefe13f2dcbdb757c605635cbf2 Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Sun, 7 Mar 2021 09:25:15 -0500
Subject: [PATCH 03/32] include branching for is.character in response to PR
 comments

---
 R/notin.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/R/notin.R b/R/notin.R
index e0f3a1b008..bdd609b932 100644
--- a/R/notin.R
+++ b/R/notin.R
@@ -1,6 +1,10 @@
 # Intended to be used to create %notin% operator
 notin = function(example, elements) {
-  return(!match(example, elements, nomatch = 0))
+  if (is.character(example)) {
+    return(!chmatch(example, elements, nomatch = 0))
+  } else {
+    return(!match(example, elements, nomatch = 0))
+  }
 }
 
 "%notin%" = function(example, elements) notin(example, elements)

From 5c041c57cb36b60e65886f9a9ecfb631fe4aa4ed Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Sun, 14 Mar 2021 23:08:55 -0400
Subject: [PATCH 04/32] implement negation of chin in chmatchMain and remove
 notin function

---
 NAMESPACE             |  2 +-
 R/data.table.R        |  2 +-
 R/notin.R             |  8 +++-----
 inst/tests/tests.Rraw |  5 ++++-
 man/notin.Rd          |  6 ++----
 src/assign.c          |  4 ++--
 src/chmatch.c         | 37 +++++++++++++++++++++++++------------
 src/data.table.h      |  2 +-
 src/subset.c          |  2 +-
 9 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index b6065ce34b..fdfd765396 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,7 +8,7 @@ exportClasses(data.table, IDate, ITime)
 export(data.table, tables, setkey, setkeyv, key, "key<-", haskey, CJ, SJ, copy)
 export(setindex, setindexv, indices)
 export(as.data.table,is.data.table,test.data.table)
-export(last,first,like,"%like%","%ilike%","%flike%",between,"%between%",inrange,"%inrange%", notin, "%notin%")
+export(last,first,like,"%like%","%ilike%","%flike%",between,"%between%",inrange,"%inrange%","%notin%")
 export(timetaken)
 export(truelength, setalloccol, alloc.col, ":=")
 export(setattr, setnames, setcolorder, set, setDT, setDF)
diff --git a/R/data.table.R b/R/data.table.R
index 2b010db77a..638d2043d4 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -2585,7 +2585,7 @@ chmatchdup = function(x, table, nomatch=NA_integer_)
   .Call(Cchmatchdup, x, table, as.integer(nomatch[1L]))
 
 "%chin%" = function(x, table)
-  .Call(Cchin, x, table)  # TO DO  if table has 'ul' then match to that
+  .Call(Cchin, x, table, FALSE)  # TO DO  if table has 'ul' then match to that
 
 chorder = function(x) {
   o = forderv(x, sort=TRUE, retGrp=FALSE)
diff --git a/R/notin.R b/R/notin.R
index bdd609b932..688979fadd 100644
--- a/R/notin.R
+++ b/R/notin.R
@@ -1,10 +1,8 @@
 # Intended to be used to create %notin% operator
-notin = function(example, elements) {
+"%notin%" = function(example, elements) {
   if (is.character(example)) {
-    return(!chmatch(example, elements, nomatch = 0))
+    return(.Call(Cchin, example, elements, TRUE))
   } else {
     return(!match(example, elements, nomatch = 0))
   }
-}
-
-"%notin%" = function(example, elements) notin(example, elements)
+}
\ No newline at end of file
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 16f753f7ec..db9e0e2b09 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -17266,4 +17266,7 @@ test(2163, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$
 
 
 # Test new feature %notin%, #4152
-test(2164, 11 %notin% 1:10, TRUE)
+test(2164.1, 11 %notin% 1:10, TRUE)
+test(2164.2, "a" %notin% c(), TRUE)
+test(2164.3, "a" %notin% c("a", "b", "c"), FALSE)
+test(2164.4, c(1, 2) %notin% c(1,2,3), c(FALSE, FALSE))
\ No newline at end of file
diff --git a/man/notin.Rd b/man/notin.Rd
index ba082d5ace..d28e4b6595 100644
--- a/man/notin.Rd
+++ b/man/notin.Rd
@@ -1,5 +1,4 @@
 \name{notin}
-\alias{notin}
 \alias{\%notin\%}
 
 \title{
@@ -11,7 +10,6 @@ Intended to behave opposite to  \code{\link[=base]{in}}
 }
 
 \usage{
-notin(example, elements)
 example \%notin\% elements
 }
 
@@ -21,11 +19,11 @@ example \%notin\% elements
 }
 
 \details{
- Internally, \code{\%notin\%} is a wrapper around \code{\link[=base]{match}}, much like \code{\%in\%}.
+ Internally, \code{\%notin\%} is a wrapper around \code{\link[=base]{match}} and \code{\link[=data.table]{chmatch}}.
 }
 
 \value{
-    Logical vector, \code{TRUE} indicating whether each \code{example} was found in \code{elements}.
+    Logical vector, \code{TRUE} indicating whether each \code{example} was not found in \code{elements}.
 }
 
 \seealso{ \code{\link[base]{match}} }
diff --git a/src/assign.c b/src/assign.c
index 5c0b808707..f39def1981 100644
--- a/src/assign.c
+++ b/src/assign.c
@@ -525,7 +525,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
   if (length(key)) {
     // if assigning to at least one key column, the key is truncated to one position before the first changed column.
     //any() and subsetVector() don't seem to be exposed by R API at C level, so this is done here long hand.
-    PROTECT(tmp = chin(key, assignedNames)); protecti++;
+    PROTECT(tmp = chin(key, assignedNames, false)); protecti++;
     newKeyLength = xlength(key);
     for (i=0;i<LENGTH(tmp);i++) if (LOGICAL(tmp)[i]) {
       // If a key column is being assigned to, set newKeyLength to the key element before since everything after that may have changed in order.
@@ -613,7 +613,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
       } else if(newKeyLength < strlen(c1)) {
         SEXP s4Str = PROTECT(mkString(s4));
         if(indexLength == 0 && // shortened index can be kept since it is just information on the order (see #2372)
-           LOGICAL(chin(s4Str, indexNames))[0] == 0) {// index with shortened name not present yet
+           LOGICAL(chin(s4Str, indexNames, false))[0] == 0) {// index with shortened name not present yet
           SET_TAG(s, install(s4));
           SET_STRING_ELT(indexNames, indexNo, mkChar(s4));
           if (verbose)
diff --git a/src/chmatch.c b/src/chmatch.c
index f80e7dd2c7..59238eaf3f 100644
--- a/src/chmatch.c
+++ b/src/chmatch.c
@@ -1,6 +1,6 @@
 #include "data.table.h"
 
-static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatchdup) {
+static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatchdup, bool negate) {
   if (!isString(table) && !isNull(table))
     error(_("table is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(table)));
   if (chin && chmatchdup)
@@ -19,6 +19,12 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
       error(_("x is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(x)));
     }
   }
+  
+  // negate inputs if needed
+  int chinNoMatch = negate?1:0;
+  int match = negate?0:1;
+  nomatch = negate?1:nomatch;
+  
   // allocations up front before savetl starts in case allocs fail
   int nprotect=0;
   SEXP ans = PROTECT(allocVector(chin?LGLSXP:INTSXP, xlen)); nprotect++;
@@ -29,7 +35,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
   int *ansd = INTEGER(ans);
   const int tablelen = length(table);
   if (tablelen==0) {
-    const int val=(chin?0:nomatch), n=xlen;
+    const int val=(chin?chinNoMatch:nomatch), n=xlen;
     for (int i=0; i<n; ++i) ansd[i]=val;
     UNPROTECT(nprotect);
     return ans;
@@ -47,7 +53,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
     ansd[0] = nomatch;
     for (int i=0; i<tablelen; ++i) {
       if (td[i]==xd[0]) {
-        ansd[0] = chin ? 1 : i+1;
+        ansd[0] = chin ? match : i+1;
         break; // short-circuit early; if there are dups in table the first is returned
       }
     }
@@ -117,9 +123,16 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
     free(counts);
     free(map);
   } else if (chin) {
-    for (int i=0; i<xlen; i++) {
-      ansd[i] = TRUELENGTH(xd[i])<0;
+    if (negate){
+      for (int i=0; i<xlen; i++) {
+        ansd[i] = TRUELENGTH(xd[i])>=0;
+      }
+    } else {
+      for (int i=0; i<xlen; i++) {
+        ansd[i] = TRUELENGTH(xd[i])<0;
+      }
     }
+    
   } else {
     for (int i=0; i<xlen; i++) {
       int m = TRUELENGTH(xd[i]);
@@ -135,21 +148,21 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
 
 // for internal use from C :
 SEXP chmatch(SEXP x, SEXP table, int nomatch) {  // chin=  chmatchdup=
-  return chmatchMain(x, table, nomatch,             false, false);
+  return chmatchMain(x, table, nomatch,             false, false, false);
 }
-SEXP chin(SEXP x, SEXP table) {
-  return chmatchMain(x, table, 0,                   true,  false);
+SEXP chin(SEXP x, SEXP table, bool negate) {
+  return chmatchMain(x, table, 0,                   true,  false, negate);
 }
 
 // for use from internals at R level; chmatch and chin are exported too but not chmatchdup yet
 SEXP chmatch_R(SEXP x, SEXP table, SEXP nomatch) {
-  return chmatchMain(x, table, INTEGER(nomatch)[0], false, false);
+  return chmatchMain(x, table, INTEGER(nomatch)[0], false, false, false);
 }
-SEXP chin_R(SEXP x, SEXP table) {
-  return chmatchMain(x, table, 0,                   true,  false);
+SEXP chin_R(SEXP x, SEXP table, SEXP negate) {
+  return chmatchMain(x, table, 0,                   true,  false, LOGICAL(negate)[0]);
 }
 SEXP chmatchdup_R(SEXP x, SEXP table, SEXP nomatch) {
-  return chmatchMain(x, table, INTEGER(nomatch)[0], false, true);
+  return chmatchMain(x, table, INTEGER(nomatch)[0], false, true, false);
 }
 
 /*
diff --git a/src/data.table.h b/src/data.table.h
index 95832a9435..674470174d 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -151,7 +151,7 @@ SEXP uniqlengths(SEXP x, SEXP n);
 
 // chmatch.c
 SEXP chmatch(SEXP x, SEXP table, int nomatch);
-SEXP chin(SEXP x, SEXP table);
+SEXP chin(SEXP x, SEXP table, bool negate);
 
 SEXP isOrderedSubset(SEXP, SEXP);
 void setselfref(SEXP);
diff --git a/src/subset.c b/src/subset.c
index 0eb1b2a72d..b0c1ce6c8b 100644
--- a/src/subset.c
+++ b/src/subset.c
@@ -325,7 +325,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md
   // but maintain key if ordered subset
   SEXP key = getAttrib(x, sym_sorted);
   if (length(key)) {
-    SEXP in = PROTECT(chin(key, getAttrib(ans,R_NamesSymbol))); nprotect++;
+    SEXP in = PROTECT(chin(key, getAttrib(ans,R_NamesSymbol), false)); nprotect++;
     int i = 0;  while(i<LENGTH(key) && LOGICAL(in)[i]) i++;
     // i is now the keylen that can be kept. 2 lines above much easier in C than R
     if (i==0 || !orderedSubset) {

From 7cf77694bf422cf0603e1ebd5f74502f3fa120fd Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Mon, 15 Mar 2021 00:32:27 -0700
Subject: [PATCH 05/32] tidy

---
 R/notin.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/notin.R b/R/notin.R
index 688979fadd..4ad5a8514b 100644
--- a/R/notin.R
+++ b/R/notin.R
@@ -1,8 +1,7 @@
-# Intended to be used to create %notin% operator
 "%notin%" = function(example, elements) {
   if (is.character(example)) {
     return(.Call(Cchin, example, elements, TRUE))
   } else {
     return(!match(example, elements, nomatch = 0))
   }
-}
\ No newline at end of file
+}

From 7c60aff628179500a2d0cf4c12708419c69a8fab Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Mon, 15 Mar 2021 00:33:32 -0700
Subject: [PATCH 06/32] terminal newline

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index db9e0e2b09..ece16bd5bc 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -17269,4 +17269,4 @@ test(2163, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$
 test(2164.1, 11 %notin% 1:10, TRUE)
 test(2164.2, "a" %notin% c(), TRUE)
 test(2164.3, "a" %notin% c("a", "b", "c"), FALSE)
-test(2164.4, c(1, 2) %notin% c(1,2,3), c(FALSE, FALSE))
\ No newline at end of file
+test(2164.4, c(1, 2) %notin% c(1,2,3), c(FALSE, FALSE))

From 99e7ad3694234da7b684df16c268fb892eb02a8a Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Mon, 15 Mar 2021 00:39:52 -0700
Subject: [PATCH 07/32] tighten & emphasize wording

Somewhat knotty here -- `TRUE` means "no" and `FALSE` means "yes", in a way, so want to tread carefully
---
 man/notin.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/notin.Rd b/man/notin.Rd
index d28e4b6595..27356e780a 100644
--- a/man/notin.Rd
+++ b/man/notin.Rd
@@ -23,7 +23,7 @@ example \%notin\% elements
 }
 
 \value{
-    Logical vector, \code{TRUE} indicating whether each \code{example} was not found in \code{elements}.
+    Logical vector, \code{TRUE} for each element of \code{example} \emph{absent} from \code{elements}, and \code{FALSE} for each element of \code{example} \emph{present} in \code{elements}.
 }
 
 \seealso{ \code{\link[base]{match}} }

From 2d622849a05f1f3433a2a00a161717f0e8e77722 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Mon, 15 Mar 2021 00:41:48 -0700
Subject: [PATCH 08/32] whitespace

---
 src/chmatch.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/chmatch.c b/src/chmatch.c
index 59238eaf3f..75e45924de 100644
--- a/src/chmatch.c
+++ b/src/chmatch.c
@@ -21,9 +21,9 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
   }
   
   // negate inputs if needed
-  int chinNoMatch = negate?1:0;
-  int match = negate?0:1;
-  nomatch = negate?1:nomatch;
+  int chinNoMatch = negate ? 1 : 0;
+  int match = negate ? 0 : 1;
+  nomatch = negate ? 1 : nomatch;
   
   // allocations up front before savetl starts in case allocs fail
   int nprotect=0;

From f652847fbde3d4eb7a110d77b07a39d37a7956ce Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Mon, 15 Mar 2021 19:16:15 -0400
Subject: [PATCH 09/32] change parameter names, update documentation, and add
 tests for edge cases

---
 R/notin.R             |  6 +++---
 inst/tests/tests.Rraw |  3 +++
 man/notin.Rd          | 21 ++++++++++-----------
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/R/notin.R b/R/notin.R
index 4ad5a8514b..ae9224571b 100644
--- a/R/notin.R
+++ b/R/notin.R
@@ -1,7 +1,7 @@
-"%notin%" = function(example, elements) {
+"%notin%" = function(x, table) {
   if (is.character(example)) {
-    return(.Call(Cchin, example, elements, TRUE))
+    return(.Call(Cchin, x, table, TRUE))
   } else {
-    return(!match(example, elements, nomatch = 0))
+    return(!match(x, table, nomatch = 0))
   }
 }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b41fb0d478..8226d46662 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -17270,3 +17270,6 @@ test(2164.1, 11 %notin% 1:10, TRUE)
 test(2164.2, "a" %notin% c(), TRUE)
 test(2164.3, "a" %notin% c("a", "b", "c"), FALSE)
 test(2164.4, c(1, 2) %notin% c(1,2,3), c(FALSE, FALSE))
+test(2164.5, "a" %notin% character(), TRUE)
+test(2164.6, "a" %notin% integer(), TRUE)
+test(2164.7, "a" %notin% NULL, TRUE)
diff --git a/man/notin.Rd b/man/notin.Rd
index 27356e780a..787f3fd853 100644
--- a/man/notin.Rd
+++ b/man/notin.Rd
@@ -2,33 +2,32 @@
 \alias{\%notin\%}
 
 \title{
-Convenience for checking if an example is in a set of elements
+Convenience for checking if an example is not in a set of elements
 }
 
 \description{
-Intended to behave opposite to  \code{\link[=base]{in}}
+Check whether an object is absent from a table, i.e., the logical inverse of \code{\link[=base]{in}}.
 }
 
 \usage{
-example \%notin\% elements
+x \%notin\% table
 }
 
 \arguments{
-   \item{example}{ vector or \code{NULL}: value to be matched }
-   \item{elements}{ vector or \code{NULL}: values to check for a match }
+   \item{x}{ vector or \code{NULL}: value to be matched }
+   \item{table}{ vector or \code{NULL}: values to check for a match }
 }
 
-\details{
- Internally, \code{\%notin\%} is a wrapper around \code{\link[=base]{match}} and \code{\link[=data.table]{chmatch}}.
-}
 
 \value{
-    Logical vector, \code{TRUE} for each element of \code{example} \emph{absent} from \code{elements}, and \code{FALSE} for each element of \code{example} \emph{present} in \code{elements}.
+    Logical vector, \code{TRUE} for each element of \code{x} \emph{absent} from \code{table}, and \code{FALSE} for each element of \code{x} \emph{present} in \code{table}.
 }
 
-\seealso{ \code{\link[base]{match}} }
+\seealso{ \code{\link[base]{match}}, \code{\link[data.table]{chmatch}}  }
+
 
 \examples{
-  11 \%notin\% 1:10
+  11 \%notin\% 1:10 # TRUE
+  "a" \%notin\% c("a", "b") # FALSE
 }
 

From ca4b779a39776cf17ccf7a3144cdc981df49da0a Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Sun, 2 May 2021 01:54:56 -0700
Subject: [PATCH 10/32] grammar

---
 man/notin.Rd | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/man/notin.Rd b/man/notin.Rd
index 787f3fd853..d84bb2024d 100644
--- a/man/notin.Rd
+++ b/man/notin.Rd
@@ -2,7 +2,7 @@
 \alias{\%notin\%}
 
 \title{
-Convenience for checking if an example is not in a set of elements
+Convenience operator for checking if an example is not in a set of elements
 }
 
 \description{
@@ -14,8 +14,8 @@ x \%notin\% table
 }
 
 \arguments{
-   \item{x}{ vector or \code{NULL}: value to be matched }
-   \item{table}{ vector or \code{NULL}: values to check for a match }
+   \item{x}{ Vector or \code{NULL}: the values to be matched. }
+   \item{table}{ Vector or \code{NULL}: the values to be matched against. }
 }
 
 

From 37629481b57cd1d873dc3c0e10abdfd4ac0e63b9 Mon Sep 17 00:00:00 2001
From: Michael <mczekanski1@gmail.com>
Date: Sat, 8 May 2021 14:47:56 -0400
Subject: [PATCH 11/32] add tests for NA

---
 inst/tests/tests.Rraw | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 788aab0364..f3adef959d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -17281,4 +17281,6 @@ test(2165.4, c(1, 2) %notin% c(1,2,3), c(FALSE, FALSE))
 test(2165.5, "a" %notin% character(), TRUE)
 test(2165.6, "a" %notin% integer(), TRUE)
 test(2165.7, "a" %notin% NULL, TRUE)
+test(2165.8, NA %notin% 1:5, TRUE)
+test(2165.9, NA %notin% c(1:5, NA), FALSE)
 

From 48faf69d9fb619b97f08b0ed3c20b597a3a84831 Mon Sep 17 00:00:00 2001
From: Michael <mczekanski1@gmail.com>
Date: Sun, 20 Jun 2021 18:58:57 -0400
Subject: [PATCH 12/32] merge with recent data.table changes

---
 .Rbuildignore                               |   2 +
 .dev/CRAN_Release.cmd                       |  24 +-
 .dev/revdep.R                               |  13 +-
 .gitattributes                              |   2 +
 .gitlab-ci.yml                              |  22 +-
 DESCRIPTION                                 |   7 +-
 NAMESPACE                                   |   1 +
 NEWS.md                                     | 146 +++-
 R/IDateTime.R                               |  16 +-
 R/as.data.table.R                           |  19 +-
 R/between.R                                 |  12 +-
 R/bmerge.R                                  |  52 +-
 R/cedta.R                                   |   6 +-
 R/data.table.R                              | 323 +++++----
 R/devel.R                                   |   8 +-
 R/duplicated.R                              |  16 +-
 R/fcast.R                                   |   2 +-
 R/fmelt.R                                   | 177 ++++-
 R/foverlaps.R                               |   4 +-
 R/frank.R                                   |   7 +-
 R/fread.R                                   |  56 +-
 R/fwrite.R                                  |  13 +-
 R/groupingsets.R                            |   8 +-
 R/last.R                                    |  28 +-
 R/like.R                                    |   5 +-
 R/merge.R                                   |  32 +-
 R/onAttach.R                                |  11 +-
 R/onLoad.R                                  |  19 +-
 R/print.data.table.R                        |  39 +-
 R/setkey.R                                  |  14 +-
 R/setops.R                                  |  22 +-
 R/tables.R                                  |   4 +-
 R/test.data.table.R                         |  74 +-
 R/utils.R                                   |  57 +-
 R/xts.R                                     |   6 +-
 _pkgdown.yml                                |   4 +-
 inst/include/datatableAPI.h                 |   5 +-
 inst/tests/benchmark.Rraw                   |   4 +-
 inst/tests/other.Rraw                       |   6 +-
 inst/tests/programming.Rraw                 | 600 ++++++++++++++++
 inst/tests/tests.Rraw                       | 727 ++++++++++++++++----
 man/address.Rd                              |   9 +-
 man/assign.Rd                               |   4 +-
 man/cdt.Rd                                  |  17 +-
 man/copy.Rd                                 |  10 +-
 man/data.table.Rd                           |  14 +-
 man/dcast.data.table.Rd                     |  36 +-
 man/deprecated.Rd                           |   3 +
 man/fcase.Rd                                |   2 +-
 man/fifelse.Rd                              |   4 +-
 man/fread.Rd                                |   3 +-
 man/froll.Rd                                |  82 +--
 man/fwrite.Rd                               |   7 +-
 man/measure.Rd                              |  92 +++
 man/melt.data.table.Rd                      |  75 +-
 man/openmp-utils.Rd                         |   5 +-
 man/shouldPrint.Rd                          |   4 +-
 man/special-symbols.Rd                      |   8 +-
 man/substitute2.Rd                          |  77 +++
 man/test.data.table.Rd                      |   5 +
 po/R-data.table.pot                         | 330 +++++++--
 po/R-zh_CN.po                               | 554 ++++++++++++---
 po/zh_CN.po                                 |  10 +-
 src/assign.c                                | 101 ++-
 src/chmatch.c                               |   9 +-
 src/data.table.h                            |   7 +-
 src/dogroups.c                              |  16 +-
 src/fastmean.c                              |  18 +-
 src/fcast.c                                 |  14 +-
 src/fifelse.c                               | 211 +++---
 src/fmelt.c                                 | 364 ++++++----
 src/forder.c                                |   6 +-
 src/frank.c                                 |  70 +-
 src/fread.c                                 |  45 +-
 src/fread.h                                 |   2 +-
 src/freadR.c                                |  12 +-
 src/froll.c                                 |  12 +-
 src/fsort.c                                 |  10 +-
 src/fwriteR.c                               |  17 +-
 src/gsumm.c                                 | 292 ++++----
 src/ijoin.c                                 | 224 +++---
 src/init.c                                  |  11 +-
 src/inrange.c                               |  15 +-
 src/nqrecreateindices.c                     |   2 +-
 src/openmp-utils.c                          |   2 +-
 src/programming.c                           |  32 +
 src/rbindlist.c                             |   2 +-
 src/snprintf.c                              |  30 +-
 src/subset.c                                |   2 +-
 src/utils.c                                 |   8 +-
 vignettes/Makefile                          |   7 -
 vignettes/datatable-faq.Rmd                 |  17 +-
 vignettes/datatable-intro.Rmd               |   2 -
 vignettes/datatable-keys-fast-subset.Rmd    |   6 +-
 vignettes/datatable-programming.Rmd         | 420 +++++++++++
 vignettes/datatable-reference-semantics.Rmd |  14 +-
 vignettes/datatable-reshape.Rmd             |  95 ++-
 vignettes/datatable-sd-usage.Rmd            |   8 +-
 98 files changed, 4549 insertions(+), 1498 deletions(-)
 create mode 100644 inst/tests/programming.Rraw
 create mode 100644 man/measure.Rd
 create mode 100644 man/substitute2.Rd
 create mode 100644 src/programming.c
 delete mode 100644 vignettes/Makefile
 create mode 100644 vignettes/datatable-programming.Rmd

diff --git a/.Rbuildignore b/.Rbuildignore
index ad51ae2da7..9a939aae81 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,3 +1,4 @@
+.dir-locals.el
 ^\.Rprofile$
 ^data\.table_.*\.tar\.gz$
 ^vignettes/plots/figures$
@@ -31,6 +32,7 @@
 ^.*\.Rproj$
 ^\.Rproj\.user$
 ^\.idea$
+^\.libs$
 
 ^.*\.dll$
 
diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd
index a2db3058b3..1dfec0a02a 100644
--- a/.dev/CRAN_Release.cmd
+++ b/.dev/CRAN_Release.cmd
@@ -154,14 +154,24 @@ grep -n "[^A-Za-z0-9]F[^A-Za-z0-9]" ./inst/tests/tests.Rraw
 grep -Enr "^[^#]*(?:\[|==|>|<|>=|<=|,|\(|\+)\s*[-]?[0-9]+[^0-9L:.e]" R | grep -Ev "stop|warning|tolerance"
 
 # Never use ifelse. fifelse for vectors when necessary (nothing yet)
- grep -Enr "\bifelse" R
+grep -Enr "\bifelse" R
+
+# use substr() instead of substring(), #4447
+grep -Fnr "substring" R
 
 # No system.time in main tests.Rraw. Timings should be in benchmark.Rraw
-grep -n "system[.]time" ./inst/tests/tests.Rraw
+grep -Fn "system.time" ./inst/tests/*.Rraw | grep -Fv "benchmark.Rraw" | grep -Fv "this system.time usage ok"
+
+# No tryCatch in *.Rraw -- tryCatch should be handled only in test() itself to avoid silently missed warnings/errors/output
+grep -Fn "tryCatch" ./inst/tests/*.Rraw
 
 # All % in *.Rd should be escaped otherwise text gets silently chopped
 grep -n "[^\]%" ./man/*.Rd
 
+# if (a & b) is either invalid or inefficient (ditto for replace & with |);
+#   if(any(a [&|] b)) is appropriate b/c of collapsing the logical vector to scalar
+grep -nr "^[^#]*if[^&#]*[^&#\"][&][^&]" R | grep -Ev "if\s*[(](?:any|all)"
+
 # seal leak potential where two unprotected API calls are passed to the same
 # function call, usually involving install() or mkChar()
 # Greppable thanks to single lines and wide screens
@@ -196,6 +206,10 @@ grep allocVector *.c | grep -v PROTECT | grep -v SET_VECTOR_ELT | grep -v setAtt
 grep coerceVector *.c | grep -v PROTECT | grep -v SET_VECTOR_ELT | grep -v setAttrib | grep -v return
 grep asCharacter *.c | grep -v PROTECT | grep -v SET_VECTOR_ELT | grep -v setAttrib | grep -v return
 
+# Enforce local scope for loop index (`for (int i=0; ...)` instead of `int i; for (i=0; ...)`)
+#   exceptions are tagged with #loop_counter_not_local_scope_ok
+grep -En "for\s*[(]\s*[a-zA-Z0-9_]+\s*=" src/*.c | grep -Fv "#loop_counter_not_local_scope_ok"
+
 cd ..
 R
 cc(test=TRUE, clean=TRUE, CC="gcc-10")  # to compile with -pedandic -Wall, latest gcc as CRAN: https://cran.r-project.org/web/checks/check_flavors.html
@@ -243,6 +257,11 @@ require(data.table)
 test.data.table(script="other.Rraw")
 test.data.table(script="*.Rraw")
 test.data.table(verbose=TRUE)   # since main.R no longer tests verbose mode
+
+# check example() works on every exported function, with these sticter options too, and also that all help pages have examples
+options(warn=2, warnPartialMatchArgs=TRUE, warnPartialMatchAttr=TRUE, warnPartialMatchDollar=TRUE)
+invisible(lapply(objects(pos="package:data.table"), example, character.only=TRUE, echo=FALSE, ask=FALSE))
+
 gctorture2(step=50)
 system.time(test.data.table(script="*.Rraw"))  # apx 8h = froll 3h + nafill 1m + main 5h
 
@@ -530,6 +549,7 @@ sudo apt-get -y install libquantlib0-dev  # for RQuantLib
 sudo apt-get -y install cargo  # for gifski, a suggest of nasoi
 sudo apt-get -y install libgit2-dev  # for gert
 sudo apt-get -y install cmake  # for symengine for RxODE
+sudo apt-get -y install libxslt1-dev  # for xslt
 sudo R CMD javareconf
 # ENDIF
 
diff --git a/.dev/revdep.R b/.dev/revdep.R
index 49aa6e06f9..38c5a93a66 100644
--- a/.dev/revdep.R
+++ b/.dev/revdep.R
@@ -9,7 +9,7 @@ Sys.unsetenv("R_PROFILE_USER")
 
 # options copied from .dev/.Rprofile that aren't run due to the way this script is started via a profile
 options(help_type="html")
-options(error=quote(dump.frames()))
+options(error=quote(utils::dump.frames()))
 options(width=200)      # for cran() output not to wrap
 
 # Check that env variables have been set correctly:
@@ -36,10 +36,12 @@ stopifnot(identical(Sys.getenv("_R_CHECK_FORCE_SUGGESTS_"),"true"))
 # e.g. https://github.com/reimandlab/ActivePathways/issues/14
 
 cflags = system("grep \"^[^#]*CFLAGS\" ~/.R/Makevars", intern=TRUE)
-cat("~/.R/Makevars contains", cflags, "ok\n")
-if (!grepl("^CFLAGS=-O[0-3]$", cflags)) {
+cat("~/.R/Makevars contains", cflags)
+if (!grepl("^CFLAGS=-O[0-3] *$", cflags)) {
   stop("Some packages have failed to install in the past (e.g. processx and RGtk2) when CFLAGS contains -pedandic, -Wall, and similar. ",
-  "So for revdepr keep CFLAGS simple; i.e. -O[0-3] only.")
+  "So for revdepr keep CFLAGS simple; i.e. -O[0-3] only. Check ~/.R/Makevars.")
+} else {
+  cat(" ok\n")
 }
 
 options(repos = c("CRAN"=c("http://cloud.r-project.org")))
@@ -155,7 +157,7 @@ status0 = function(bioc=FALSE) {
     if (file.exists(fn)) {
       v = suppressWarnings(system(paste0("grep 'Status:' ",fn), intern=TRUE))
       if (!length(v)) return("RUNNING")
-      return(substring(v,9))
+      return(substr(v, 9L, nchar(v)))
     }
     if (file.exists(paste0("./",x,".Rcheck"))) return("RUNNING")
     return("NOT STARTED")
@@ -248,7 +250,6 @@ cran = function()  # reports CRAN status of the .cran.fail packages
   cat("tools::CRAN_check_results() returned",prettyNum(nrow(db), big.mark=","),"rows in",timetaken(p),"\n")
   rel = unique(db$Flavor)
   rel = sort(rel[grep("release",rel)])
-  stopifnot(identical(rel, c("r-release-linux-x86_64", "r-release-macos-x86_64", "r-release-windows-ix86+x86_64")))
   cat("R-release is used for revdep checking so comparing to CRAN results for R-release\n")
   ans = db[Package %chin% .fail.cran & Flavor %chin% rel, Status, keyby=.(Package, Flavor)]
   dcast(ans, Package~Flavor, value.var="Status", fill="")[.fail.cran,]
diff --git a/.gitattributes b/.gitattributes
index fa1385d99a..9c72b27aea 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,3 @@
 * -text
+*.Rraw linguist-language=R
+
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 2f760c2782..d36f99fbcd 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,9 +6,9 @@ variables:
   TZ: "UTC"  ## to avoid 'Failed to create bus connection' from timedatectl via Sys.timezone() on Docker with R 3.4.
              ## Setting TZ for all GLCI jobs to isolate them from timezone. We could have a new GLCI job to test under
              ## a non-UTC timezone, although, that's what we do routinely in dev.
-  R_REL_VERSION: "4.0"
-  R_DEVEL_VERSION: "4.1"
-  R_OLDREL_VERSION: "3.6"
+  R_REL_VERSION: "4.1"
+  R_DEVEL_VERSION: "4.2"
+  R_OLDREL_VERSION: "4.0"
 
 stages:
   - dependencies
@@ -61,7 +61,7 @@ build: ## build data.table sources as tar.gz archive
   image: registry.gitlab.com/jangorecki/dockerfiles/r-builder
   needs: ["mirror-packages"]
   before_script:
-    - Rscript -e 'install.packages("knitr", repos=file.path("file:",normalizePath("bus/mirror-packages/cran")), quiet=TRUE)'
+    - Rscript -e 'install.packages(c("knitr","rmarkdown"), repos=file.path("file:",normalizePath("bus/mirror-packages/cran")), quiet=TRUE)'
     - rm -r bus
     - echo "Revision:" $CI_BUILD_REF >> ./DESCRIPTION
   script:
@@ -96,16 +96,14 @@ build: ## build data.table sources as tar.gz archive
   - mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION
 
 .test-install-r-rel-win: &install-r-rel-win
-  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/old/4.0.3/R-4.0.3-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
+  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/old/4.1.0/R-4.1.0-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
 .test-install-r-devel-win: &install-r-devel-win
   - curl.exe -s -o ../R-devel.exe https://cloud.r-project.org/bin/windows/base/R-devel-win.exe; Start-Process -FilePath ..\R-devel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
 .test-install-r-oldrel-win: &install-r-oldrel-win
-  - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/3.6.3/R-3.6.3-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
+  - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.0.5/R-4.0.5-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
 
 .test-install-rtools-win: &install-rtools-win
   - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools40-x86_64.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools40" -NoNewWindow -Wait
-.test-install-rtools35-win: &install-rtools35-win
-  - curl.exe -s -o ../Rtools35.exe https://cloud.r-project.org/bin/windows/Rtools/Rtools35.exe; Start-Process -FilePath ..\Rtools35.exe -ArgumentList "/VERYSILENT /DIR=C:\Rtools" -NoNewWindow -Wait
 
 .test-template: &test
   stage: test
@@ -191,7 +189,7 @@ test-rel-cran-lin: ## R-release on Linux, extra NOTEs check and build pdf manual
   variables:
     _R_CHECK_CRAN_INCOMING_: "TRUE"           ## stricter --as-cran checks should run in dev pipelines continuously (not sure what they are though)
     _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE"   ## Other than no URL checking (takes many minutes) or 'Days since last update 0' NOTEs needed, #3284
-    _R_CHECK_CRAN_INCOMING_TARBALL_THRESHOLD_: "7500000" ## effective from R 4.1.0, then 00check.log can be checked for "OK" rather than "2 NOTEs"
+    _R_CHECK_CRAN_INCOMING_TARBALL_THRESHOLD_: "7500000" ## effective from R 4.1.0
   before_script:
     - *install-deps
     - *cp-src
@@ -205,7 +203,7 @@ test-rel-cran-lin: ## R-release on Linux, extra NOTEs check and build pdf manual
     - R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1)
     - *rm-src
     - >-
-        Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 2 NOTEs")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 2 NOTEs"), " (size of tarball) but ", shQuote(l)) else q("no")'
+        Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 1 NOTE")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 1 NOTE"), " (installed package size) but ", shQuote(l)) else q("no")'
 
 test-dev-cran-lin: ## R-devel on Linux, --enable-strict-barrier --disable-long-double, check for new notes and compilation warnings, thus allow_failure
   <<: *test-lin
@@ -285,8 +283,8 @@ test-old-win: ## R-oldrel on Windows
     R_VERSION: "$R_OLDREL_VERSION"
   before_script:
     - *install-r-oldrel-win
-    - *install-rtools35-win
-    - $ENV:PATH = "C:\R\bin;C:\Rtools\bin;$ENV:PATH"
+    - *install-rtools-win
+    - $ENV:PATH = "C:\R\bin;C:\rtools40\usr\bin;$ENV:PATH"
     - *install-deps-win
     - *cp-src-win
     - rm.exe -r bus
diff --git a/DESCRIPTION b/DESCRIPTION
index 78ca52b485..8ab2deaa0d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -61,10 +61,13 @@ Authors@R: c(
   person("Vaclav","Tlapak",        role="ctb"),
   person("Kevin","Ushey",          role="ctb"),
   person("Dirk","Eddelbuettel",    role="ctb"),
-  person("Ben","Schwen",           role="ctb"))
+  person("Ben","Schwen",           role="ctb"),
+  person("Tony","Fischetti",       role="ctb"),
+  person("Ofek","Shilon",          role="ctb"),
+  person("Vadim","Khotilovich",    role="ctb"))
 Depends: R (>= 3.1.0)
 Imports: methods
-Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown
+Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown
 SystemRequirements: zlib
 Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development.
 License: MPL-2.0 | file LICENSE
diff --git a/NAMESPACE b/NAMESPACE
index fdfd765396..0aa68631e5 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -56,6 +56,7 @@ export(nafill)
 export(setnafill)
 export(.Last.updated)
 export(fcoalesce)
+export(substitute2)
 
 S3method("[", data.table)
 S3method("[<-", data.table)
diff --git a/NEWS.md b/NEWS.md
index f5e0ee0527..b150f4236c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -11,8 +11,131 @@
 
 2. `mean(na.rm=TRUE)` by group is now GForce optimized, [#4849](https://github.com/Rdatatable/data.table/issues/4849). Thanks to the [h2oai/db-benchmark](https://github.com/h2oai/db-benchmark) project for spotting this issue. The 1 billion row example in the issue shows 48s reduced to 14s. The optimization also applies to type `integer64` resulting in a difference to the `bit64::mean.integer64` method: `data.table` returns a `double` result whereas `bit64` rounds the mean to the nearest integer.
 
+3. `fwrite()` now writes UTF-8 or native csv files by specifying the `encoding=` argument, [#1770](https://github.com/Rdatatable/data.table/pull/1770). Thanks to @shrektan for the request and the PR.
+
+4. `data.table()` no longer fills empty vectors with `NA` with warning. Instead a 0-row `data.table` is returned, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Since `data.table()` is used internally by `.()`, this brings the following examples in line with expectations in most cases. Thanks to @shrektan for the suggestion and PR.
+
+    ```R
+    DT = data.table(A=1:3, B=letters[1:3])
+    DT[A>3,   .(ITEM='A>3', A, B)]  # (1)
+    DT[A>3][, .(ITEM='A>3', A, B)]  # (2)
+    # the above are now equivalent as expected and return:  
+    Empty data.table (0 rows and 3 cols): ITEM,A,B
+    # Previously, (2) returned :
+          ITEM     A      B
+       <char> <int> <char>
+    1:    A>3    NA   <NA>
+    Warning messages:
+    1: In as.data.table.list(jval, .named = NULL) :
+      Item 2 has 0 rows but longest item has 1; filled with NA
+    2: In as.data.table.list(jval, .named = NULL) :
+      Item 3 has 0 rows but longest item has 1; filled with NA
+    ```
+    
+    ```R
+    DT = data.table(A=1:3, B=letters[1:3], key="A")
+    DT[.(1:3, double()), B]
+    # new result :
+    character(0)   
+    # old result :
+    [1] "a" "b" "c"
+    Warning message:
+    In as.data.table.list(i) :
+      Item 2 has 0 rows but longest item has 3; filled with NA
+    ```
+
+5. `%like%` on factors with a large number of levels is now faster, [#4748](https://github.com/Rdatatable/data.table/issues/4748). The example in the PR shows 2.37s reduced to 0.86s on a factor length 100 million containing 1 million unique 10-character strings. Thanks to @statquant for reporting, and @shrektan for implementing.
+
+6. `keyby=` now accepts `TRUE`/`FALSE` together with `by=`, [#4307](https://github.com/Rdatatable/data.table/issues/4307). The primary motivation is benchmarking where `by=` vs `keyby=` is varied across a set of queries. Thanks to Jan Gorecki for the request and the PR.
+
+    ```R
+    DT[, sum(colB), keyby="colA"]
+    DT[, sum(colB), by="colA", keyby=TRUE]   # same
+    ```
+    
+7. `fwrite()` gains a new `datatable.fwrite.sep` option to change the default separator, still `","` by default. Thanks to Tony Fischetti for the PR. As is good practice in R in general, we usually resist new global options for the reason that a user changing the option for their own code can inadvertently change the behaviour of any package using `data.table` too. However, in this case, the global option affects file output rather than code behaviour. In fact, the very reason the user may wish to change the default separator is that they know a different separator is more appropriate for their data being passed to the package using `fwrite` but cannot otherwise change the `fwrite` call within that package.
+
+8. `melt()` now supports `NA` entries when specifying a list of `measure.vars`, which translate into runs of missing values in the output. Useful for melting wide data with some missing columns, [#4027](https://github.com/Rdatatable/data.table/issues/4027). Thanks to @vspinu for reporting, and @tdhock for implementing.
+
+9. `melt()` now supports multiple output variable columns via the `variable_table` attribute of `measure.vars`, [#3396](https://github.com/Rdatatable/data.table/issues/3396) [#2575](https://github.com/Rdatatable/data.table/issues/2575) [#2551](https://github.com/Rdatatable/data.table/issues/2551), [#4998](https://github.com/Rdatatable/data.table/issues/4998). It should be a `data.table` with one row that describes each element of the `measure.vars` vector(s). These data/columns are copied to the output instead of the usual variable column. This is backwards compatible since the previous behavior (one output variable column) is used when there is no `variable_table`. New functions `measure()` and `measurev()` which use either a separator or a regex to create a `measure.vars` list/vector with `variable_table` attribute; useful for melting data that has several distinct pieces of information encoded in each column name. See new `?measure` and new section in reshape vignette. Thanks to Matthias Gomolka, Ananda Mahto, Hugh Parsonage, Mark Fairbanks for reporting, and to @tdhock for implementing.
+
+10. A new interface for _programming on data.table_ has been added, closing [#2655](https://github.com/Rdatatable/data.table/issues/2655) and many other linked issues. It is built using base R's `substitute`-like interface via a new `env` argument to `[.data.table`. For details see the new vignette *programming on data.table*, and the new `?substitute2` manual page. Thanks to numerous users for filing requests, and Jan Gorecki for implementing.
+
+    ```R
+    DT = data.table(x = 1:5, y = 5:1)
+
+    # parameters
+    in_col_name = "x"
+    fun = "sum"
+    fun_arg1 = "na.rm"
+    fun_arg1val = TRUE
+    out_col_name = "sum_x"
+
+    # parameterized query
+    #DT[, .(out_col_name = fun(in_col_name, fun_arg1=fun_arg1val))]
+
+    # desired query
+    DT[, .(sum_x = sum(x, na.rm=TRUE))]
+
+    # new interface
+    DT[, .(out_col_name = fun(in_col_name, fun_arg1=fun_arg1val)),
+      env = list(
+        in_col_name = "x",
+        fun = "sum",
+        fun_arg1 = "na.rm",
+        fun_arg1val = TRUE,
+        out_col_name = "sum_x"
+      )]
+    ```
+    
+11. `DT[, if (...) .(a=1L) else .(a=1L, b=2L), by=group]` now returns a 1-column result with warning `j may not evaluate to the same number of columns for each group`, rather than error `'names' attribute [2] must be the same length as the vector`, [#4274](https://github.com/Rdatatable/data.table/issues/4274). Thanks to @robitalec for reporting, and Michael Chirico for the PR.
+
+12. Typo checking in `i` available since 1.11.4 is extended to work in non-English sessions, [#4989](https://github.com/Rdatatable/data.table/issues/4989). Thanks to Michael Chirico for the PR.
+
+13. `fifelse()` now coerces logical `NA` to other types and the `na` argument supports vectorized input, [#4277](https://github.com/Rdatatable/data.table/issues/4277) [#4286](https://github.com/Rdatatable/data.table/issues/4286) [#4287](https://github.com/Rdatatable/data.table/issues/4287). Thanks to @michaelchirico and @shrektan for reporting, and @shrektan for implementing.
+
 ## BUG FIXES
 
+1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.
+
+2. `print(DT, trunc.cols=TRUE)` and the corresponding `datatable.print.trunc.cols` option (new feature 3 in v1.13.0) could incorrectly display an extra column, [#4266](https://github.com/Rdatatable/data.table/issues/4266). Thanks to @tdhock for the bug report and @MichaelChirico for the PR.
+
+3. `fread(..., nrows=0L)` now works as intended and the same as `nrows=0`; i.e. returning the column names and typed empty columns determined by the large sample, [#4686](https://github.com/Rdatatable/data.table/issues/4686). Thanks to @hongyuanjia for reporting, and Benjamin Schwendinger for the PR.
+
+4. Passing `.SD` to `frankv()` with `ties.method='random'` or with `na.last=NA` failed with `.SD is locked`, [#4429](https://github.com/Rdatatable/data.table/issues/4429). Thanks @smarches for the report.
+
+5. Filtering data.table using `which=NA` to return non-matching indices will now properly work for non-optimized subsetting as well, closes [#4411](https://github.com/Rdatatable/data.table/issues/4411).
+
+6. When `j` returns an object whose class `"X"` inherits from `data.table`; i.e. class `c("X", "data.table", "data.frame")`, the derived class `"X"` is no longer incorrectly dropped from the class of the `data.table` returned, [#4324](https://github.com/Rdatatable/data.table/issues/4324). Thanks to @HJAllen for reporting and @shrektan for the PR.
+
+7. `as.data.table()` failed with `.subset2(x, i, exact = exact): attempt to select less than one element in get1index` when passed an object inheriting from `data.table` with a different `[[` method, such as the class `dfidx` from the `dfidx` package, [#4526](https://github.com/Rdatatable/data.table/issues/4526). Thanks @RicoDiel for the report, and Michael Chirico for the PR.
+
+8. `rbind()` and `rbindlist()` of length-0 ordered factors failed with `Internal error: savetl_init checks failed`, [#4795](https://github.com/Rdatatable/data.table/issues/4795) [#4823](https://github.com/Rdatatable/data.table/issues/4823). Thanks to @shrektan and @dbart79 for reporting, and @shrektan for fixing.
+
+9. `data.table(NULL)[, firstCol:=1L]` created `data.table(firstCol=1L)` ok but did not update the internal `row.names` attribute, causing `Error in '$<-.data.frame'(x, name, value) : replacement has 1 row, data has 0` when passed to packages like `ggplot` which use `DT` as if it is a `data.frame`, [#4597](https://github.com/Rdatatable/data.table/issues/4597). Thanks to Matthew Son for reporting, and Cole Miller for the PR.
+
+10. `X[Y, .SD, by=]` (joining and grouping in the same query) could segfault if i) `by=` is supplied custom data (i.e. not simple expressions of columns), and ii) some rows of `Y` do not match to any rows in `X`, [#4892](https://github.com/Rdatatable/data.table/issues/4892). Thanks to @Kodiologist for reporting, @ColeMiller1 for investigating, and @tlapak for the PR.
+
+11. Assigning a set of 2 or more all-NA values to a factor column could segfault, [#4824](https://github.com/Rdatatable/data.table/issues/4824). Thanks to @clerousset for reporting and @shrektan for fixing.
+
+12. `as.data.table(table(NULL))` now returns `data.table(NULL)` rather than error `attempt to set an attribute on NULL`, [#4179](https://github.com/Rdatatable/data.table/issues/4179). The result differs slightly to `as.data.frame(table(NULL))` (0-row, 1-column) because 0-column works better with other `data.table` functions like `rbindlist()`. Thanks to Michael Chirico for the report and fix.
+
+13. `melt` with a list for `measure.vars` would output `variable` inconsistently between `na.rm=TRUE` and `FALSE`, [#4455](https://github.com/Rdatatable/data.table/issues/4455). Thanks to @tdhock for reporting and fixing.
+
+14. `by=...get()...` could fail with `object not found`, [#4873](https://github.com/Rdatatable/data.table/issues/4873) [#4981](https://github.com/Rdatatable/data.table/issues/4981). Thanks to @sindribaldur for reporting, and @OfekShilon for fixing.
+
+15. `print(x, col.names='none')` now removes the column names as intended for wide `data.table`s whose column names don't fit on a single line, [#4270](https://github.com/Rdatatable/data.table/issues/4270). Thanks to @tdhock for the report, and Michael Chirico for fixing.
+
+16. `DT[, min(colB), by=colA]` when `colB` is type `character` would miss blank strings (`""`) at the beginning of a group and return the smallest non-blank instead of blank, [#4848](https://github.com/Rdatatable/data.table/issues/4848). Thanks to Vadim Khotilovich for reporting and for the PR fixing it.
+
+17. Assigning a wrong-length or non-list vector to a list column could segfault, [#4166](https://github.com/Rdatatable/data.table/issues/4166) [#4667](https://github.com/Rdatatable/data.table/issues/4667) [#4678](https://github.com/Rdatatable/data.table/issues/4678) [#4729](https://github.com/Rdatatable/data.table/issues/4729). Thanks to @fklirono, Kun Ren, @kevinvzandvoort and @peterlittlejohn for reporting, and to Václav Tlapák for the PR.
+
+18. `as.data.table()` on `xts` objects containing a column named `x` would return an `index` of type plain `integer` rather than `POSIXct`, [#4897](https://github.com/Rdatatable/data.table/issues/4897). Thanks to Emil Sjørup for reporting, and Jan Gorecki for the PR.
+
+19. A fix to `as.Date(c("", ...))` in R 4.0.3, [17909](https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=17909), has been backported to `data.table::as.IDate()` so that it too now returns `NA` for the first item when it is blank, even in older versions of R back to 3.1.0, rather than the incorrect error `character string is not in a standard unambiguous format`, [#4676](https://github.com/Rdatatable/data.table/issues/4676). Thanks to Arun Srinivasan for reporting, and Michael Chirico both for the `data.table` PR and for submitting the patch to R that was accepted and included in R 4.0.3.
+
+20. `uniqueN(DT, by=character())` is now equivalent to `uniqueN(DT)` rather than internal error `'by' is either not integer or is length 0`, [#4594](https://github.com/Rdatatable/data.table/issues/4594). Thanks Marco Colombo for the report, and Michael Chirico for the PR. Similarly for `unique()`, `duplicated()` and `anyDuplicated()`.
+
 ## NOTES
 
 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.<type>()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example :
@@ -25,6 +148,19 @@
     nafill(x, fill=as.integer(3.14))  # no warning; the as.<type> conveys intent
     ```
 
+2. `CsubsetDT` exported C function has been renamed to `DT_subsetDT`. This requires `R_GetCCallable("data.table", "CsubsetDT")` to be updated to `R_GetCCallable("data.table", "DT_subsetDT")`. Additionally there is now a dedicated header file for data.table C exports `include/datatableAPI.h`, [#4643](https://github.com/Rdatatable/data.table/issues/4643), thanks to @eddelbuettel, which makes it easier to _import_ data.table C functions.
+
+3. In v1.12.4, fractional `fread(..., stringsAsFactors=)` was added. For example if `stringsAsFactors=0.2`, any character column with fewer than 20% unique strings would be cast as `factor`. This is now documented in `?fread` as well, [#4706](https://github.com/Rdatatable/data.table/issues/4706). Thanks to @markderry for the PR.
+
+4. `cube(DT, by="a")` now gives a more helpful error that `j` is missing, [#4282](https://github.com/Rdatatable/data.table/pull/4282).
+
+5. v1.13.0 (July 2020) fixed a segfault/corruption/error (depending on version of R and circumstances) in `dcast()` when `fun.aggregate` returned `NA` (type `logical`) in an otherwise `character` result, [#2394](https://github.com/Rdatatable/data.table/issues/2394). This fix was the result of other internal rework and there was no news item at the time. A new test to cover this case has now been added. Thanks Vadim Khotilovich for reporting, and Michael Chirico for investigating, pinpointing when the fix occurred and adding the test.
+
+6. `DT[subset]` where `DT[(subset)]` or `DT[subset==TRUE]` was intended; i.e., subsetting by a logical column whose name conflicts with an existing function, now gives a friendlier error message, [#5014](https://github.com/Rdatatable/data.table/issues/5014). Thanks @michaelchirico for the suggestion and PR, and @ColeMiller1 for helping with the fix.
+
+7. Grouping by a `list` column has its error message improved stating this is unsupported, [#4308](https://github.com/Rdatatable/data.table/issues/4308). Thanks @sindribaldur for filing, and @michaelchirico for the PR. Please add your vote and especially use cases to the [#1597](https://github.com/Rdatatable/data.table/issues/1597) feature request.
+
+
 # data.table [v1.14.0](https://github.com/Rdatatable/data.table/milestone/23?closed=1)  (21 Feb 2021)
 
 ## POTENTIALLY BREAKING CHANGES
@@ -58,7 +194,7 @@
 
 2. `fwrite()`'s mutithreaded `gzip` compression failed on Solaris with Z_STREAM_ERROR, [#4099](https://github.com/Rdatatable/data.table/issues/4099). Since this feature was released in Oct 2019 (see item 3 in v1.12.4 below in this news file) there have been no known problems with it on Linux, Windows or Mac. For Solaris, we have been successively adding more and more detailed tracing to the output in each release, culminating in tracing `zlib` internals at byte level by reading `zlib`'s source. The problem did not manifest itself on [R-hub](https://builder.r-hub.io/)'s Solaris instances, so we had to work via CRAN output. If `zlib`'s `z_stream` structure is declared inside a parallel region but before a parallel for, it appears that the particular OpenMP implementation used by CRAN's Solaris moves the structure to a new address on entering the parallel for. Ordinarily this memory move would not matter, however, `zlib` internals have a self reference pointer to the parent, and check that the pointers match. This mismatch caused the -2 (Z_STREAM_ERROR). Allocating an array of structures, one for each thread, before the parallel region avoids the memory move with no cost.
 
-    It should be carefully noted that we cannot be sure it really is a problem unique to CRAN's Solaris. Even if it seems that way after one year of observations. For example, it could be compiler flags, or particular memory circumstances, either of which could occur on other operating systems too. However, we are unaware of why it would make sense for the OpenMP implementation to move the structure at that point. Any optimizations such as aligning the set of structures to cache line boundaries could be performed at the start of the parallel region, not after the parallel for. If anyone reading this knows more, please let us know. 
+    It should be carefully noted that we cannot be sure it really is a problem unique to CRAN's Solaris. Even if it seems that way after one year of observations. For example, it could be compiler flags, or particular memory circumstances, either of which could occur on other operating systems too. However, we are unaware of why it would make sense for the OpenMP implementation to move the structure at that point. Any optimizations such as aligning the set of structures to cache line boundaries could be performed at the start of the parallel region, not after the parallel for. If anyone reading this knows more, please let us know.
 
 ## NOTES
 
@@ -103,7 +239,7 @@
 1. `bit64` v4.0.2 and `bit` v4.0.3, both released on 30th July, correctly broke `data.table`'s tests. Like other packages on our `Suggest` list, we check `data.table` works with `bit64` in our tests. The first break was because `all.equal` always returned `TRUE` in previous versions of `bit64`. Now that `all.equal` works for `integer64`, the incorrect test comparison was revealed. If you use `bit64`, or `nanotime` which uses `bit64`, it is highly recommended to upgrade to the latest `bit64` version. Thanks to Cole Miller for the PR to accommodate `bit64`'s update.
 
     The second break caused by `bit` was the addition of a `copy` function. We did not ask, but the `bit` package kindly offered to change to a different name since `data.table::copy` is long standing. `bit` v4.0.4 released 4th August renamed `copy` to `copy_vector`. Otherwise, users of `data.table` would have needed to prefix every occurrence of `copy` with `data.table::copy` if they use `bit64` too, since `bit64` depends on (rather than importing) `bit`. Again, this impacted `data.table`'s tests which mimic a user's environment; not `data.table` itself per se.
-    
+
     We have requested that CRAN policy be modified to require that reverse dependency testing include packages which `Suggest` the package. Had this been the case, reverse dependency testing of `bit64` would have caught the impact on `data.table` before release.
 
 2. `?.NGRP` now displays the help page as intended, [#4946](https://github.com/Rdatatable/data.table/issues/4649). Thanks to @KyleHaynes for posting the issue, and Cole Miller for the fix. `.NGRP` is a symbol new in v1.13.0; see below in this file.
@@ -122,7 +258,7 @@ has a better chance of working on Mac.
 1. `fread` now supports native parsing of `%Y-%m-%d`, and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `%Y-%m-%dT%H:%M:%OS%z`, [#4464](https://github.com/Rdatatable/data.table/pull/4464). Dates are returned as `data.table`'s `integer`-backed `IDate` class (see `?IDate`), and datetimes are returned as `POSIXct` provided either `Z` or the offset from `UTC` is present; e.g. `fwrite()` outputs UTC by default including the final `Z`. Reminder that `IDate` inherits from R's `Date` and is identical other than it uses the `integer` type where (oddly) R uses the `double` type for dates (8 bytes instead of 4). `fread()` gains a `tz` argument to control datetime values that are missing a Z or UTC-offset (now referred to as *unmarked* datetimes); e.g. as written by `write.csv`. By default `tz=""` means, as in R, read the unmarked datetime in local time. Unless the timezone of the R session is UTC (e.g. the TZ environment variable is set to `"UTC"`, or `""` on non-Windows), unmarked datetime will then by read by `fread` as character, as before. If you have been using `colClasses="POSIXct"` that will still work using R's `as.POSIXct()` which will interpret the unmarked datetime in local time, as before, and still slowly. You can tell `fread` to read unmarked datetime as UTC, and quickly, by passing `tz="UTC"` which may be appropriate in many circumstances. Note that the default behaviour of R to read and write csv using unmarked datetime can lead to different research results when the csv file has been saved in one timezone and read in another due to observations being shifted to a different date. If you have been using `colClasses="POSIXct"` for UTC-marked datetime (e.g. as written by `fwrite` including the final `Z`) then it will automatically speed up with no changes needed.
 
     Since this is a potentially breaking change, i.e. existing code may depend on dates and datetimes being read as type character as before, a temporary option is provided to restore the old behaviour: `options(datatable.old.fread.datetime.character=TRUE)`. However, in most cases, we expect existing code to still work with no changes.
-    
+
     The minor version number is bumped from 12 to 13, i.e. `v1.13.0`, where the `.0` conveys 'be-aware' as is common practice. As with any new feature, there may be bugs to fix and changes to defaults required in future. In addition to convenience, `fread` is now significantly faster in the presence of dates, UTC-marked datetimes, and unmarked datetime when tz="UTC" is provided.
 
 ## NEW FEATURES
@@ -229,7 +365,6 @@ has a better chance of working on Mac.
 11. `copy()` now overallocates deeply nested lists of `data.table`s, [#4205](https://github.com/Rdatatable/data.table/issues/4205). Thanks to @d-sci for reporting and the PR.
 
 12. `rbindlist` no longer errors when coercing complex vectors to character vectors, [#4202](https://github.com/Rdatatable/data.table/issues/4202). Thanks to @sritchie73 for reporting and the PR.
-
 13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388).
 
 14. Selecting key columns could incur a large speed penalty, [#4498](https://github.com/Rdatatable/data.table/issues/4498). Thanks to @Jesper on Stack Overflow for the report.
@@ -244,7 +379,7 @@ has a better chance of working on Mac.
 
 19. Matrices resulting from logical operators or comparisons on `data.table`s, e.g. in `dta == dtb`, can no longer have their colnames changed by reference later, [#4323](https://github.com/Rdatatable/data.table/issues/4323). Thanks to @eyherabh for reporting and @tlapak for the PR.
 
-20. The environment variable `R_DATATABLE_NUM_THREADS` was being limited by `R_DATATABLE_NUM_PROCS_PERCENT` (by default 50%), [#4514](https://github.com/Rdatatable/data.table/issues/4514). It is now consistent with `setDTthreads()` and only limited by the full number of logical CPUs. For example, on a machine with 8 logical CPUs, `R_DATATABLE_NUM_THREADS=6` now results in 6 threads rather than 4 (50% of 8).
+20. The environment variable `R_DATATABLE_NUM_THREADS` was being limited by `R_DATATABLE_NUM_PROCS_PERCENT` (by default 50%), [#4514](https://github.com/Rdatatable/data.table/issues/4514). It is now consistent with `setDTthreads()` and only limited by the full number of logical CPUs. For example, on a machine with 8 logical CPUs, `R_DATATABLE_NUM_THREADS=6` now results in 6 threads rather than 4 (50% of 8).r
 
 ## NOTES
 
@@ -1484,4 +1619,3 @@ When `j` is a symbol (as in the quanteda and xgboost examples above) it will con
 
 
 # data.table v1.9.8 (Nov 2016) back to v1.2 (Aug 2008) has been moved to [NEWS.0.md](https://github.com/Rdatatable/data.table/blob/master/NEWS.0.md)
-
diff --git a/R/IDateTime.R b/R/IDateTime.R
index 0c0be82e83..832424091f 100644
--- a/R/IDateTime.R
+++ b/R/IDateTime.R
@@ -7,6 +7,10 @@ as.IDate = function(x, ...) UseMethod("as.IDate")
 
 as.IDate.default = function(x, ..., tz = attr(x, "tzone", exact=TRUE)) {
   if (is.null(tz)) tz = "UTC"
+  if (is.character(x)) {
+    # backport of similar patch to base::as.Date.character in R 4.0.3, #4676
+    is.na(x) = !nzchar(x)
+  }
   as.IDate(as.Date(x, tz = tz, ...))
 }
 
@@ -240,20 +244,20 @@ rep.ITime = function (x, ...)
   class(y) = "ITime"   # unlass and rep could feasibly not copy, hence use class<- not setattr()
   y
 }
-                           
-round.ITime <- function(x, digits = c("hours", "minutes"), ...) 
+
+round.ITime <- function(x, digits = c("hours", "minutes"), ...)
 {
   (setattr(switch(match.arg(digits),
                   hours = as.integer(round(unclass(x)/3600)*3600),
-                  minutes = as.integer(round(unclass(x)/60)*60)), 
+                  minutes = as.integer(round(unclass(x)/60)*60)),
            "class", "ITime"))
-} 
+}
 
-trunc.ITime <- function(x, units = c("hours", "minutes"), ...) 
+trunc.ITime <- function(x, units = c("hours", "minutes"), ...)
 {
   (setattr(switch(match.arg(units),
                   hours = as.integer(unclass(x)%/%3600*3600),
-                  minutes = as.integer(unclass(x)%/%60*60)), 
+                  minutes = as.integer(unclass(x)%/%60*60)),
            "class", "ITime"))
 }
 
diff --git a/R/as.data.table.R b/R/as.data.table.R
index 308a7b2ffe..75e8d23ae0 100644
--- a/R/as.data.table.R
+++ b/R/as.data.table.R
@@ -20,7 +20,7 @@ as.data.table.Date = as.data.table.ITime = function(x, keep.rownames=FALSE, key=
   tt = deparse(substitute(x))[1L]
   nm = names(x)
   # FR #2356 - transfer names of named vector as "rn" column if required
-  if (!identical(keep.rownames, FALSE) & !is.null(nm))
+  if (!identical(keep.rownames, FALSE) && !is.null(nm))
     x = list(nm, unname(x))
   else x = list(x)
   if (tt == make.names(tt)) {
@@ -33,6 +33,8 @@ as.data.table.Date = as.data.table.ITime = function(x, keep.rownames=FALSE, key=
 
 # as.data.table.table - FR #361
 as.data.table.table = function(x, keep.rownames=FALSE, key=NULL, ...) {
+  # prevent #4179 & just cut out here
+  if (any(dim(x) == 0L)) return(null.data.table())
   # Fix for bug #43 - order of columns are different when doing as.data.table(with(DT, table(x, y)))
   val = rev(dimnames(provideDimnames(x)))
   if (is.null(names(val)) || !any(nzchar(names(val))))
@@ -95,12 +97,12 @@ as.data.table.array = function(x, keep.rownames=FALSE, key=NULL, sorted=TRUE, va
   # NULL dimnames will create integer keys, not character as in table method
   val = if (is.null(dnx)) {
     lapply(dx, seq.int)
-  } else if (any(nulldnx<-sapply(dnx, is.null))) {
+  } else if (any(nulldnx <- vapply_1b(dnx, is.null))) {
     dnx[nulldnx] = lapply(dx[nulldnx], seq.int) #3636
     dnx
   } else dnx
   val = rev(val)
-  if (is.null(names(val)) || all(!nzchar(names(val))))
+  if (is.null(names(val)) || !any(nzchar(names(val))))
     setattr(val, 'names', paste0("V", rev(seq_along(val))))
   if (value.name %chin% names(val))
     stop("Argument 'value.name' should not overlap with column names in result: ", brackify(rev(names(val))))
@@ -129,6 +131,7 @@ as.data.table.list = function(x,
   eachncol = integer(n)
   missing.check.names = missing(check.names)
   origListNames = if (missing(.named)) names(x) else NULL  # as.data.table called directly, not from inside data.table() which provides .named, #3854
+  empty_atomic = FALSE
   for (i in seq_len(n)) {
     xi = x[[i]]
     if (is.null(xi)) next    # eachncol already initialized to 0 by integer() above
@@ -148,10 +151,13 @@ as.data.table.list = function(x,
     }
     eachnrow[i] = NROW(xi)    # for a vector (including list() columns) returns the length
     eachncol[i] = NCOL(xi)    # for a vector returns 1
+    if (is.atomic(xi) && length(xi)==0L && !is.null(xi)) {
+      empty_atomic = TRUE  # any empty atomic (not empty list()) should result in nrows=0L, #3727
+    }
   }
   ncol = sum(eachncol)  # hence removes NULL items silently (no error or warning), #842.
   if (ncol==0L) return(null.data.table())
-  nrow = max(eachnrow)
+  nrow = if (empty_atomic) 0L else max(eachnrow)
   ans = vector("list",ncol)  # always return a new VECSXP
   recycle = function(x, nrow) {
     if (length(x)==nrow) {
@@ -173,8 +179,6 @@ as.data.table.list = function(x,
     if (is.null(xi)) { n_null = n_null+1L; next }
     if (eachnrow[i]>1L && nrow%%eachnrow[i]!=0L)   # in future: eachnrow[i]!=nrow
       warning("Item ", i, " has ", eachnrow[i], " rows but longest item has ", nrow, "; recycled with remainder.")
-    if (eachnrow[i]==0L && nrow>0L && is.atomic(xi))   # is.atomic to ignore list() since list() is a common way to initialize; let's not insist on list(NULL)
-      warning("Item ", i, " has 0 rows but longest item has ", nrow, "; filled with NA")  # the rep() in recycle() above creates the NA vector
     if (is.data.table(xi)) {   # matrix and data.frame were coerced to data.table above
       prefix = if (!isFALSE(.named[i]) && isTRUE(nchar(names(x)[i])>0L)) paste0(names(x)[i],".") else ""  # test 2058.12
       for (j in seq_along(xi)) {
@@ -219,7 +223,8 @@ as.data.table.data.frame = function(x, keep.rownames=FALSE, key=NULL, ...) {
   }
   if (any(vapply_1i(x, function(xi) length(dim(xi))))) { # not is.atomic because is.atomic(matrix) is true
     # a data.frame with a column that is data.frame needs to be expanded; test 2013.4
-    return(as.data.table.list(x, keep.rownames=keep.rownames, ...))
+    # x may be a class with [[ method that behaves differently, so as.list first for default [[, #4526
+    return(as.data.table.list(as.list(x), keep.rownames=keep.rownames, ...))
   }
   ans = copy(x)  # TO DO: change this deep copy to be shallow.
   setattr(ans, "row.names", .set_row_names(nrow(x)))
diff --git a/R/between.R b/R/between.R
index f5a6600da6..61fee332b4 100644
--- a/R/between.R
+++ b/R/between.R
@@ -44,11 +44,11 @@ between = function(x, lower, upper, incbounds=TRUE, NAbounds=TRUE, check=FALSE)
     # length(upper) can be 1 or length(x) independently of lower
     .Call(Cbetween, x, lower, upper, incbounds, NAbounds, check)
   } else {
-    if (isTRUE(getOption("datatable.verbose"))) cat("optimised between not available for this data type, fallback to slow R routine\n")
+    if (isTRUE(getOption("datatable.verbose"))) catf("optimised between not available for this data type, fallback to slow R routine\n")
     if (isTRUE(NAbounds) && (anyNA(lower) || anyNA(upper))) stop("Not yet implemented NAbounds=TRUE for this non-numeric and non-character type")
     if (check && any(lower>upper, na.rm=TRUE)) stop("Some lower>upper for this non-numeric and non-character type")
-    if (incbounds) x>=lower & x<=upper
-    else x>lower & x<upper
+    if (incbounds) x>=lower & x<=upper  # this & is correct not &&
+    else           x> lower & x< upper
   }
 }
 
@@ -78,7 +78,7 @@ inrange = function(x,lower,upper,incbounds=TRUE) {
   subject = setDT(list(l=lower, u=upper))
   ops = if (incbounds) c(4L, 2L) else c(5L, 3L) # >=,<= and >,<
   verbose = isTRUE(getOption("datatable.verbose"))
-  if (verbose) {last.started.at=proc.time();cat("forderv(query) took ... ");flush.console()}
+  if (verbose) {last.started.at=proc.time();catf("forderv(query) took ... ");flush.console()}
   if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
   ans = bmerge(shallow(subject), query, 1L:2L, c(1L,1L),
       0, c(FALSE, TRUE), 0L, "all", ops, verbose) # fix for #1819, turn on verbose messages
@@ -86,9 +86,9 @@ inrange = function(x,lower,upper,incbounds=TRUE) {
   options(datatable.verbose=FALSE)
   setDT(ans[c("starts", "lens")], key=c("starts", "lens"))
   options(datatable.verbose=verbose)
-  if (verbose) {last.started.at=proc.time();cat("Generating final logical vector ... ");flush.console()}
+  if (verbose) {last.started.at=proc.time();catf("Generating final logical vector ... ");flush.console()}
   .Call(Cinrange, idx <- vector("logical", length(x)), xo, ans[["starts"]], ans[["lens"]])
-  if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console}
+  if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console}
   idx
 }
 
diff --git a/R/bmerge.R b/R/bmerge.R
index 3d6ab028f3..6bafd0e5bc 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -43,23 +43,25 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     xc = xcols[a]
     xclass = getClass(x[[xc]])
     iclass = getClass(i[[ic]])
+    xname = paste0("x.", names(x)[xc])
+    iname = paste0("i.", names(i)[ic])
     if (!xclass %chin% supported) stop("x.", names(x)[xc]," is type ", xclass, " which is not supported by data.table join")
     if (!iclass %chin% supported) stop("i.", names(i)[ic]," is type ", iclass, " which is not supported by data.table join")
     if (xclass=="factor" || iclass=="factor") {
       if (roll!=0.0 && a==length(icols))
         stop("Attempting roll join on factor column when joining x.",names(x)[xc]," to i.",names(i)[ic],". Only integer, double or character columns may be roll joined.")
       if (xclass=="factor" && iclass=="factor") {
-        if (verbose) cat("Matching i.",names(i)[ic]," factor levels to x.",names(x)[xc]," factor levels.\n",sep="")
+        if (verbose) catf("Matching %s factor levels to %s factor levels.\n", iname, xname)
         set(i, j=ic, value=chmatch(levels(i[[ic]]), levels(x[[xc]]), nomatch=0L)[i[[ic]]])  # nomatch=0L otherwise a level that is missing would match to NA values
         next
       } else {
         if (xclass=="character") {
-          if (verbose) cat("Coercing factor column i.",names(i)[ic]," to type character to match type of x.",names(x)[xc],".\n",sep="")
+          if (verbose) catf("Coercing factor column %s to type character to match type of %s.\n", iname, xname)
           set(i, j=ic, value=val<-as.character(i[[ic]]))
           set(callersi, j=ic, value=val)  # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
           next
         } else if (iclass=="character") {
-          if (verbose) cat("Matching character column i.",names(i)[ic]," to factor levels in x.",names(x)[xc],".\n",sep="")
+          if (verbose) catf("Matching character column %s to factor levels in %s.\n", iname, xname)
           newvalue = chmatch(i[[ic]], levels(x[[xc]]), nomatch=0L)
           if (anyNA(i[[ic]])) newvalue[is.na(i[[ic]])] = NA_integer_  # NA_character_ should match to NA in factor, #3809
           set(i, j=ic, value=newvalue)
@@ -69,29 +71,29 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
       stop("Incompatible join types: x.", names(x)[xc], " (",xclass,") and i.", names(i)[ic], " (",iclass,"). Factor columns must join to factor or character columns.")
     }
     if (xclass == iclass) {
-      if (verbose) cat("i.",names(i)[ic]," has same type (",xclass,") as x.",names(x)[xc],". No coercion needed.\n", sep="")
+      if (verbose) catf("%s has same type (%s) as %s. No coercion needed.\n", iname, xclass, xname)
       next
     }
     if (xclass=="character" || iclass=="character" ||
         xclass=="logical" || iclass=="logical" ||
         xclass=="factor" || iclass=="factor") {
       if (anyNA(i[[ic]]) && allNA(i[[ic]])) {
-        if (verbose) cat("Coercing all-NA i.",names(i)[ic]," (",iclass,") to type ",xclass," to match type of x.",names(x)[xc],".\n",sep="")
+        if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", iname, iclass, xclass, xname)
         set(i, j=ic, value=match.fun(paste0("as.", xclass))(i[[ic]]))
         next
       }
       else if (anyNA(x[[xc]]) && allNA(x[[xc]])) {
-        if (verbose) cat("Coercing all-NA x.",names(x)[xc]," (",xclass,") to type ",iclass," to match type of i.",names(i)[ic],".\n",sep="")
+        if (verbose) catf("Coercing all-NA %s (%s) to type %s to match type of %s.\n", xname, xclass, iclass, iname)
         set(x, j=xc, value=match.fun(paste0("as.", iclass))(x[[xc]]))
         next
       }
       stop("Incompatible join types: x.", names(x)[xc], " (",xclass,") and i.", names(i)[ic], " (",iclass,")")
     }
     if (xclass=="integer64" || iclass=="integer64") {
-      nm = paste0(c("i.","x."), c(names(i)[ic], names(x)[xc]))
+      nm = c(iname, xname)
       if (xclass=="integer64") { w=i; wc=ic; wclass=iclass; } else { w=x; wc=xc; wclass=xclass; nm=rev(nm) }  # w is which to coerce
       if (wclass=="integer" || (wclass=="double" && !isReallyReal(w[[wc]]))) {
-        if (verbose) cat("Coercing ",wclass," column ", nm[1L], if(wclass=="double")" (which contains no fractions)"," to type integer64 to match type of ", nm[2L],".\n",sep="")
+        if (verbose) catf("Coercing %s column %s%s to type integer64 to match type of %s.\n", wclass, nm[1L], if (wclass=="double") " (which contains no fractions)" else "", nm[2L])
         set(w, j=wc, value=bit64::as.integer64(w[[wc]]))
       } else stop("Incompatible join types: ", nm[2L], " is type integer64 but ", nm[1L], " is type double and contains fractions")
     } else {
@@ -100,17 +102,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
         if (!isReallyReal(i[[ic]])) {
           # common case of ad hoc user-typed integers missing L postfix joining to correct integer keys
           # we've always coerced to int and returned int, for convenience.
-          if (verbose) cat("Coercing double column i.",names(i)[ic]," (which contains no fractions) to type integer to match type of x.",names(x)[xc],".\n",sep="")
+          if (verbose) catf("Coercing double column %s (which contains no fractions) to type integer to match type of %s", iname, xname)
           val = as.integer(i[[ic]])
           if (!is.null(attributes(i[[ic]]))) attributes(val) = attributes(i[[ic]])  # to retain Date for example; 3679
           set(i, j=ic, value=val)
           set(callersi, j=ic, value=val)       # change the shallow copy of i up in [.data.table to reflect in the result, too.
         } else {
-          if (verbose) cat("Coercing integer column x.",names(x)[xc]," to type double to match type of i.",names(i)[ic]," which contains fractions.\n",sep="")
+          if (verbose) catf("Coercing integer column %s to type double to match type of %s which contains fractions.\n", xname, iname)
           set(x, j=xc, value=as.double(x[[xc]]))
         }
       } else {
-        if (verbose) cat("Coercing integer column i.",names(i)[ic]," to type double for join to match type of x.",names(x)[xc],".\n",sep="")
+        if (verbose) catf("Coercing integer column %s to type double for join to match type of %s.\n", iname, xname)
         set(i, j=ic, value=as.double(i[[ic]]))
       }
     }
@@ -126,17 +128,17 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     # equi join. use existing key (#1825) or existing secondary index (#1439)
     if (identical(xcols, head(chmatch(key(x), names(x)), length(xcols)))) {
       xo = integer(0L)
-      if (verbose) cat("on= matches existing key, using key\n")
+      if (verbose) catf("on= matches existing key, using key\n")
     } else {
       xo = NULL
       if (isTRUE(getOption("datatable.use.index"))) {
         xo = getindex(x, names(x)[xcols])
-        if (verbose && !is.null(xo)) cat("on= matches existing index, using index\n")
+        if (verbose && !is.null(xo)) catf("on= matches existing index, using index\n")
       }
       if (is.null(xo)) {
         if (verbose) {last.started.at=proc.time(); flush.console()}
         xo = forderv(x, by = xcols)
-        if (verbose) {cat("Calculated ad hoc index in",timetaken(last.started.at),"\n"); flush.console()}
+        if (verbose) {catf("Calculated ad hoc index in %s\n", timetaken(last.started.at)); flush.console()}
         # TODO: use setindex() instead, so it's cached for future reuse
       }
     }
@@ -147,9 +149,9 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     # non-equi operators present.. investigate groups..
     nqgrp = integer(0L)
     nqmaxgrp = 1L
-    if (verbose) cat("Non-equi join operators detected ... \n")
+    if (verbose) catf("Non-equi join operators detected ... \n")
     if (roll != FALSE) stop("roll is not implemented for non-equi joins yet.")
-    if (verbose) {last.started.at=proc.time();cat("  forder took ... ");flush.console()}
+    if (verbose) {last.started.at=proc.time();catf("  forder took ... ");flush.console()}
     # TODO: could check/reuse secondary indices, but we need 'starts' attribute as well!
     xo = forderv(x, xcols, retGrp=TRUE)
     if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
@@ -158,28 +160,28 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
     if (length(resetcols)) {
       # TODO: can we get around having to reorder twice here?
       # or at least reuse previous order?
-      if (verbose) {last.started.at=proc.time();cat("  Generating group lengths ... ");flush.console()}
+      if (verbose) {last.started.at=proc.time();catf("  Generating group lengths ... ");flush.console()}
       resetlen = attr(forderv(x, resetcols, retGrp=TRUE), 'starts', exact=TRUE)
       resetlen = .Call(Cuniqlengths, resetlen, nrow(x))
-      if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()}
+      if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console()}
     } else resetlen = integer(0L)
-    if (verbose) {last.started.at=proc.time();cat("  Generating non-equi group ids ... ");flush.console()}
+    if (verbose) {last.started.at=proc.time();catf("  Generating non-equi group ids ... ");flush.console()}
     nqgrp = .Call(Cnestedid, x, xcols[non_equi:length(xcols)], xo, xg, resetlen, mult)
-    if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()}
+    if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console()}
     if (length(nqgrp)) nqmaxgrp = max(nqgrp) # fix for #1986, when 'x' is 0-row table max(.) returns -Inf.
     if (nqmaxgrp > 1L) { # got some non-equi join work to do
       if ("_nqgrp_" %in% names(x)) stop("Column name '_nqgrp_' is reserved for non-equi joins.")
-      if (verbose) {last.started.at=proc.time();cat("  Recomputing forder with non-equi ids ... ");flush.console()}
+      if (verbose) {last.started.at=proc.time();catf("  Recomputing forder with non-equi ids ... ");flush.console()}
       set(nqx<-shallow(x), j="_nqgrp_", value=nqgrp)
       xo = forderv(nqx, c(ncol(nqx), xcols))
-      if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()}
+      if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console()}
     } else nqgrp = integer(0L)
-    if (verbose) cat("  Found", nqmaxgrp, "non-equi group(s) ...\n")
+    if (verbose) catf("  Found %d non-equi group(s) ...\n", nqmaxgrp)
   }
 
-  if (verbose) {last.started.at=proc.time();cat("Starting bmerge ...\n");flush.console()}
+  if (verbose) {last.started.at=proc.time();catf("Starting bmerge ...\n");flush.console()}
   ans = .Call(Cbmerge, i, x, as.integer(icols), as.integer(xcols), io, xo, roll, rollends, nomatch, mult, ops, nqgrp, nqmaxgrp)
-  if (verbose) {cat("bmerge done in",timetaken(last.started.at),"\n"); flush.console()}
+  if (verbose) {catf("bmerge done in %s\n",timetaken(last.started.at)); flush.console()}
   # TO DO: xo could be moved inside Cbmerge
 
   ans$xo = xo  # for further use by [.data.table
diff --git a/R/cedta.R b/R/cedta.R
index 262db0a105..7ace210079 100644
--- a/R/cedta.R
+++ b/R/cedta.R
@@ -32,15 +32,15 @@ cedta = function(n=2L) {
     "data.table" %chin% names(getNamespaceImports(ns)) ||   # most common and recommended cases first for speed
     (nsname=="utils" &&
       (exists("debugger.look", parent.frame(n+1L)) ||
-      (length(sc<-sys.calls())>=8L && sc[[length(sc)-7L]][[1L]]=='example')) ) || # 'example' for #2972
+      (length(sc<-sys.calls())>=8L && sc[[length(sc)-7L]] %iscall% 'example')) ) || # 'example' for #2972
     (nsname=="base" && all(c("FUN", "X") %chin% ls(parent.frame(n)))) || # lapply
-    (nsname %chin% cedta.pkgEvalsUserCode && any(sapply(sys.calls(), function(x) is.name(x[[1L]]) && (x[[1L]]=="eval" || x[[1L]]=="evalq")))) ||
+    (nsname %chin% cedta.pkgEvalsUserCode && any(vapply_1b(sys.calls(), function(x) is.name(x[[1L]]) && (x[[1L]]=="eval" || x[[1L]]=="evalq")))) ||
     nsname %chin% cedta.override ||
     isTRUE(ns$.datatable.aware) ||  # As of Sep 2018: RCAS, caretEnsemble, dtplyr, rstanarm, rbokeh, CEMiTool, rqdatatable, RImmPort, BPRMeth, rlist
     tryCatch("data.table" %chin% get(".Depends",paste("package",nsname,sep=":"),inherits=FALSE),error=function(e)FALSE)  # both ns$.Depends and get(.Depends,ns) are not sufficient
   if (!ans && getOption("datatable.verbose")) {
     # nocov start
-    cat("cedta decided '",nsname,"' wasn't data.table aware. Here is call stack with [[1L]] applied:\n",sep="")
+    catf("cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:\n", nsname)
     print(sapply(sys.calls(), "[[", 1L))
     # nocov end
     # so we can trace the namespace name that may need to be added (very unusually)
diff --git a/R/data.table.R b/R/data.table.R
index b8b2b4bf04..204eef6272 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -108,24 +108,37 @@ replace_dot_alias = function(e) {
 }
 
 .checkTypos = function(err, ref) {
-  if (grepl('object.*not found', err$message)) {
-    used = gsub(".*object '([^']+)'.*", "\\1", err$message)
+  # a slightly wonky workaround so that this still works in non-English sessions, #4989
+  # generate this at run time (as opposed to e.g. onAttach) since session language is
+  #   technically OK to update (though this should be rare), and since it's low-cost
+  #   to do so here because we're about to error anyway.
+  missing_obj_fmt = gsub(
+    "'missing_datatable_variable____'",
+    "'(?<obj_name>[^']+)'",
+    tryCatch(eval(parse(text="missing_datatable_variable____")), error=identity)$message
+    # eval(parse()) to avoid "no visible binding for global variable" note from R CMD check
+    # names starting with _ don't parse, so no leading _ in the name
+  )
+  idx <- regexpr(missing_obj_fmt, err$message, perl=TRUE)
+  if (idx > 0L) {
+    start = attr(idx, "capture.start", exact=TRUE)[ , "obj_name"]
+    used = substr(
+      err$message,
+      start,
+      start + attr(idx, "capture.length", exact=TRUE)[ , "obj_name"] - 1L
+    )
     found = agrep(used, ref, value=TRUE, ignore.case=TRUE, fixed=TRUE)
     if (length(found)) {
-      stop("Object '", used, "' not found. Perhaps you intended ",
-           paste(head(found, 5L), collapse=", "),
-           if (length(found)<=5L) "" else paste(" or",length(found)-5L, "more"))
+      stop("Object '", used, "' not found. Perhaps you intended ", brackify(found))
     } else {
-      stop("Object '", used, "' not found amongst ",
-           paste(head(ref, 5L), collapse=', '),
-           if (length(ref)<=5L) "" else paste(" and", length(ref)-5L, "more"))
+      stop("Object '", used, "' not found amongst ", brackify(ref))
     }
   } else {
     stop(err$message, call.=FALSE)
   }
 }
 
-"[.data.table" = function (x, i, j, by, keyby, with=TRUE, nomatch=getOption("datatable.nomatch", NA), mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL)
+"[.data.table" = function (x, i, j, by, keyby, with=TRUE, nomatch=getOption("datatable.nomatch", NA), mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL, env=NULL)
 {
   # ..selfcount <<- ..selfcount+1  # in dev, we check no self calls, each of which doubles overhead, or could
   # test explicitly if the caller is [.data.table (even stronger test. TO DO.)
@@ -137,7 +150,7 @@ replace_dot_alias = function(e) {
         else if (missing(drop)) `[.data.frame`(x,i,j)
         else `[.data.frame`(x,i,j,drop)
     # added is.data.table(ans) check to fix bug #81
-    if (!missing(i) & is.data.table(ans)) setkey(ans,NULL)  # See test 304
+    if (!missing(i) && is.data.table(ans)) setkey(ans, NULL)  # See test 304
     return(ans)
   }
   if (!missing(verbose)) {
@@ -149,16 +162,25 @@ replace_dot_alias = function(e) {
   }
   .global$print=""
   missingby = missing(by) && missing(keyby)  # for tests 359 & 590 where passing by=NULL results in data.table not vector
-  if (!missing(keyby)) {
-    if (!missing(by)) stop("Provide either by= or keyby= but not both")
-    if (missing(j)) { warning("Ignoring keyby= because j= is not supplied"); keyby=NULL; }
-    by=bysub=substitute(keyby)
-    keyby=TRUE
-    # Assign to 'by' so that by is no longer missing and we can proceed as if there were one by
+  if (missingby || missing(j)) {
+    if (!missingby) warning("Ignoring by/keyby because 'j' is not supplied")
+    by = bysub = NULL
+    keyby = FALSE
   } else {
-    if (!missing(by) && missing(j)) { warning("Ignoring by= because j= is not supplied"); by=NULL; }
-    by=bysub= if (missing(by)) NULL else substitute(by)
-    keyby=FALSE
+    if (missing(by)) {
+      by = bysub = if (is.null(env)) substitute(keyby)
+                                else eval(substitute(substitute2(.keyby, env), list(.keyby = substitute(keyby))))
+      keyby = TRUE
+    } else {
+      by = bysub = if (is.null(env)) substitute(by)
+                               else  eval(substitute(substitute2(.by, env), list(.by = substitute(by))))
+      if (missing(keyby))
+        keyby = FALSE
+      else if (!isTRUEorFALSE(keyby))
+        stop("When by and keyby are both provided, keyby must be TRUE or FALSE")
+    }
+    if (missing(by)) { missingby=TRUE; by=bysub=NULL }  # possible when env is used, PR#4304
+    else if (verbose) cat("Argument 'by' after substitute: ", paste(deparse(bysub, width.cutoff=500L), collapse=" "), "\n", sep="")
   }
   bynull = !missingby && is.null(by) #3530
   byjoin = !is.null(by) && is.symbol(bysub) && bysub==".EACHI"
@@ -214,11 +236,20 @@ replace_dot_alias = function(e) {
   av = NULL
   jsub = NULL
   if (!missing(j)) {
-    jsub = replace_dot_alias(substitute(j))
+    if (is.null(env)) jsub = substitute(j) else {
+      jsub = eval(substitute(
+        substitute2(.j, env),
+        list(.j = substitute(j))
+      ))
+      if (missing(jsub)) {j = substitute(); jsub=NULL} else if (verbose) cat("Argument 'j'  after substitute: ", paste(deparse(jsub, width.cutoff=500L), collapse=" "), "\n", sep="")
+    }
+  }
+  if (!missing(j)) {
+    jsub = replace_dot_alias(jsub)
     root = if (is.call(jsub)) as.character(jsub[[1L]])[1L] else ""
     if (root == ":" ||
         (root %chin% c("-","!") && jsub[[2L]] %iscall% '(' && jsub[[2L]][[2L]] %iscall% ':') ||
-        ( (!length(av<-all.vars(jsub)) || all(substring(av,1L,2L)=="..")) &&
+        ( (!length(av<-all.vars(jsub)) || all(startsWith(av, ".."))) &&
           root %chin% c("","c","paste","paste0","-","!") &&
           missingby )) {   # test 763. TODO: likely that !missingby iff with==TRUE (so, with can be removed)
       # When no variable names (i.e. symbols) occur in j, scope doesn't matter because there are no symbols to find.
@@ -235,8 +266,8 @@ replace_dot_alias = function(e) {
       with=FALSE
       if (length(av)) {
         for (..name in av) {
-          name = substring(..name, 3L)
-          if (name=="") stop("The symbol .. is invalid. The .. prefix must be followed by at least one character.")
+          name = substr(..name, 3L, nchar(..name))
+          if (!nzchar(name)) stop("The symbol .. is invalid. The .. prefix must be followed by at least one character.")
           if (!exists(name, where=parent.frame())) {
             stop("Variable '",name,"' is not found in calling scope. Looking in calling scope because you used the .. prefix.",
               if (exists(..name, where=parent.frame()))
@@ -252,7 +283,7 @@ replace_dot_alias = function(e) {
         ..syms = av
       }
     } else if (is.name(jsub)) {
-      if (substring(jsub, 1L, 2L) == "..") stop("Internal error:  DT[, ..var] should be dealt with by the branch above now.") # nocov
+      if (startsWith(as.character(jsub), "..")) stop("Internal error:  DT[, ..var] should be dealt with by the branch above now.") # nocov
       if (!with && !exists(as.character(jsub), where=parent.frame()))
         stop("Variable '",jsub,"' is not found in calling scope. Looking in calling scope because you set with=FALSE. Also, please use .. symbol prefix and remove with=FALSE.")
     }
@@ -290,10 +321,18 @@ replace_dot_alias = function(e) {
 
   # setdiff removes duplicate entries, which'll create issues with duplicated names. Use %chin% instead.
   dupdiff = function(x, y) x[!x %chin% y]
-
+  isub = NULL
+  if (!missing(i)) {
+    if (is.null(env)) isub = substitute(i) else {
+      isub = eval(substitute(
+        substitute2(.i, env),
+        list(.i = substitute(i))
+      ))
+      if (missing(isub)) {i = substitute(); isub=NULL} else if (verbose) cat("Argument 'i'  after substitute: ", paste(deparse(isub, width.cutoff=500L), collapse=" "), "\n", sep="")
+    }
+  }
   if (!missing(i)) {
     xo = NULL
-    isub = substitute(i)
     if (identical(isub, NA)) {
       # only possibility *isub* can be NA (logical) is the symbol NA itself; i.e. DT[NA]
       # replace NA in this case with NA_integer_ as that's almost surely what user intended to
@@ -366,14 +405,17 @@ replace_dot_alias = function(e) {
     } else {
       # isub is a single symbol name such as B in DT[B]
       i = try(eval(isub, parent.frame(), parent.frame()), silent=TRUE)
-      if (inherits(i,"try-error")) {
+      if (inherits(i,"try-error") || is.function(i)) {
         # must be "not found" since isub is a mere symbol
         col = try(eval(isub, x), silent=TRUE)  # is it a column name?
-        msg = if (inherits(col,"try-error")) " and it is not a column name either."
-        else paste0(" but it is a column of type ", typeof(col),". If you wish to select rows where that column contains TRUE",
-                    ", or perhaps that column contains row numbers of itself to select, try DT[(col)], DT[DT$col], or DT[col==TRUE] is particularly clear and is optimized.")
-        stop(as.character(isub), " is not found in calling scope", msg,
-             " When the first argument inside DT[...] is a single symbol (e.g. DT[var]), data.table looks for var in calling scope.")
+        msg = if (inherits(col, "try-error")) gettextf(
+          "'%s' is not found in calling scope and it is not a column name either. ",
+          as.character(isub)
+        ) else gettextf(
+          "'%s' is not found in calling scope, but it is a column of type %s. If you wish to select rows where that column contains TRUE, or perhaps that column contains row numbers of itself to select, try DT[(col)], DT[DT$col], or DT[col==TRUE} is particularly clear and is optimized. ",
+          as.character(isub), typeof(col)
+        )
+        stop(msg, "When the first argument inside DT[...] is a single symbol (e.g. DT[var]), data.table looks for var in calling scope.")
       }
     }
     if (restore.N) {
@@ -418,9 +460,11 @@ replace_dot_alias = function(e) {
         len_common_names = length(common_names)
         if (!len_common_names) stop("Attempting to do natural join but no common columns in provided tables")
         if (verbose) {
-          which_cols_msg = if (len_common_names == length(x)) " all 'x' columns"
-          else paste(":", brackify(common_names))
-          cat("Joining but 'x' has no key, natural join using", which_cols_msg, "\n", sep = "")
+          which_cols_msg = if (len_common_names == length(x)) {
+            catf("Joining but 'x' has no key, natural join using all 'x' columns")
+          } else {
+            catf("Joining but 'x' has no key, natural join using: %s", brackify(common_names))
+          }
         }
         on = common_names
       }
@@ -448,10 +492,10 @@ replace_dot_alias = function(e) {
       # Implementation for not-join along with by=.EACHI, #604
       if (notjoin && (byjoin || mult != "all")) { # mult != "all" needed for #1571
         notjoin = FALSE
-        if (verbose) {last.started.at=proc.time();cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff_(x,i) ...");flush.console()}
+        if (verbose) {last.started.at=proc.time();catf("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff_(x,i) ...");flush.console()}
         orignames = copy(names(i))
         i = setdiff_(x, i, rightcols, leftcols) # part of #547
-        if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()}
+        if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console()}
         setnames(i, orignames[leftcols])
         setattr(i, 'sorted', names(i)) # since 'x' has key set, this'll always be sorted
       }
@@ -479,7 +523,7 @@ replace_dot_alias = function(e) {
         if (!byjoin || nqbyjoin) {
           # Really, `anyDuplicated` in base is AWESOME!
           # allow.cartesian shouldn't error if a) not-join, b) 'i' has no duplicates
-          if (verbose) {last.started.at=proc.time();cat("Constructing irows for '!byjoin || nqbyjoin' ... ");flush.console()}
+          if (verbose) {last.started.at=proc.time();catf("Constructing irows for '!byjoin || nqbyjoin' ... ");flush.console()}
           irows = if (allLen1) f__ else vecseq(f__,len__,
             if (allow.cartesian ||
                 notjoin || # #698. When notjoin=TRUE, ignore allow.cartesian. Rows in answer will never be > nrow(x).
@@ -493,7 +537,7 @@ replace_dot_alias = function(e) {
           if (identical(nomatch, 0L) && allLen1) irows = irows[irows != 0L]
         } else {
           if (length(xo) && missing(on))
-            stop("Internal error. Cannot by=.EACHI when joining to a secondary key, yet") # nocov
+            stop("Internal error. Cannot by=.EACHI when joining to an index, yet") # nocov
           # since f__ refers to xo later in grouping, so xo needs to be passed through to dogroups too.
           if (length(irows))
             stop("Internal error. irows has length in by=.EACHI") # nocov
@@ -518,7 +562,7 @@ replace_dot_alias = function(e) {
       if (length(xo) && length(irows)) {
         irows = xo[irows]   # TO DO: fsort here?
         if (mult=="all" && !allGrp1) { # following #1991 fix, !allGrp1 will always be TRUE. TODO: revisit.
-          if (verbose) {last.started.at=proc.time();cat("Reorder irows for 'mult==\"all\" && !allGrp1' ... ");flush.console()}
+          if (verbose) {last.started.at=proc.time();catf("Reorder irows for 'mult==\"all\" && !allGrp1' ... ");flush.console()}
           irows = setorder(setDT(list(indices=rep.int(indices__, len__), irows=irows)))[["irows"]]
           if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
         }
@@ -530,13 +574,13 @@ replace_dot_alias = function(e) {
           ## restore original order. This is a very expensive operation.
           ## benchmarks have shown that starting with 1e6 irows, a tweak can significantly reduce time
           ## (see #2366)
-          if (verbose) {last.started.at=proc.time()[3L];cat("Reordering", length(irows), "rows after bmerge done in ... ");flush.console()}
+          if (verbose) {last.started.at=proc.time();catf("Reordering %d rows after bmerge done in ... ", length(irows));flush.console()}
           if(length(irows) < 1e6){
             irows = fsort(irows, internal=TRUE) ## internally, fsort on integer falls back to forderv
             } else {
               irows = as.integer(fsort(as.numeric(irows))) ## nocov; parallelized for numeric, but overhead of type conversion
             }
-          if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()}
+          if (verbose) {cat(timetaken(last.started.at), "\n");flush.console()}
         }
         ## make sure, all columns are taken from x and not from i.
         ## This is done by simply telling data.table to continue as if there was a simple subset
@@ -553,6 +597,11 @@ replace_dot_alias = function(e) {
       # i is not a data.table
       if (!is.logical(i) && !is.numeric(i)) stop("i has evaluated to type ", typeof(i), ". Expecting logical, integer or double.")
       if (is.logical(i)) {
+        if (is.na(which)) { # #4411 i filter not optimized to join: DT[A > 1, which = NA]
+          ## we need this branch here, not below next to which=TRUE because irows=i=which(i) will filter out NAs: DT[A > 10, which = NA] will be incorrect
+          if (notjoin) stop("internal error: notjoin and which=NA (non-matches), huh? please provide reproducible example to issue tracker") # nocov
+          return(which(is.na(i) | !i))
+        }
         if (length(i)==1L  # to avoid unname copy when length(i)==nrow (normal case we don't want to slow down)
           && isTRUE(unname(i))) { irows=i=NULL }  # unname() for #2152 - length 1 named logical vector.
         # NULL is efficient signal to avoid creating 1:nrow(x) but still return all rows, fixes #1249
@@ -582,9 +631,9 @@ replace_dot_alias = function(e) {
     if (notjoin) {
       if (byjoin || !is.integer(irows) || is.na(nomatch)) stop("Internal error: notjoin but byjoin or !integer or nomatch==NA") # nocov
       irows = irows[irows!=0L]
-      if (verbose) {last.started.at=proc.time()[3L];cat("Inverting irows for notjoin done in ... ");flush.console()}
+      if (verbose) {last.started.at=proc.time();catf("Inverting irows for notjoin done in ... ");flush.console()}
       i = irows = if (length(irows)) seq_len(nrow(x))[-irows] else NULL  # NULL meaning all rows i.e. seq_len(nrow(x))
-      if (verbose) cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n")
+      if (verbose) cat(timetaken(last.started.at), "\n")
       leftcols = integer()  # proceed as if row subset from now on, length(leftcols) is switched on later
       rightcols = integer()
       # Doing this once here, helps speed later when repeatedly subsetting each column. R's [irows] would do this for each
@@ -660,7 +709,7 @@ replace_dot_alias = function(e) {
         j = eval(jsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) # else j will be evaluated for the first time on next line
       } else {
         names(..syms) = ..syms
-        j = eval(jsub, lapply(substring(..syms,3L), get, pos=parent.frame()), parent.frame())
+        j = eval(jsub, lapply(substr(..syms, 3L, nchar(..syms)), get, pos=parent.frame()), parent.frame())
       }
       if (is.logical(j)) j <- which(j)
       if (!length(j) && !notj) return( null.data.table() )
@@ -681,7 +730,7 @@ replace_dot_alias = function(e) {
         if (!length(ansvals)) return(null.data.table())
         if (!length(leftcols)) {
           if (!anyNA(ansvals)) return(.Call(CsubsetDT, x, irows, ansvals))
-          else stop("column(s) not found: ", paste(ansvars[is.na(ansvals)],collapse=", "))
+          else stop("column(s) not found: ", brackify(ansvars[is.na(ansvals)]))
         }
         # else the NA in ansvals are for join inherited scope (test 1973), and NA could be in irows from join and data in i should be returned (test 1977)
         #   in both cases leave to the R-level subsetting of i and x together further below
@@ -750,7 +799,12 @@ replace_dot_alias = function(e) {
           bysub = parse(text=paste0("list(",paste(bysub,collapse=","),")"))[[1L]]
           bysubl = as.list.default(bysub)
         }
-        allbyvars = intersect(all.vars(bysub), names_x)
+        if (any(c("eval","evalq","eval.parent","local","get","mget","dynGet") %chin% all.names(bysub)))
+          # when the 'by' expression includes get/mget/eval, all.vars cannot be trusted to infer all used columns, #4981
+          allbyvars = NULL
+        else
+          allbyvars = intersect(all.vars(bysub), names_x)  
+        
         orderedirows = .Call(CisOrderedSubset, irows, nrow(x))  # TRUE when irows is NULL (i.e. no i clause). Similar but better than is.sorted(f__)
         bysameorder = byindex = FALSE
         if (!bysub %iscall% ":" && ##Fix #4285
@@ -761,11 +815,11 @@ replace_dot_alias = function(e) {
             # TODO: could be allowed if length(irows)>1 but then the index would need to be squashed for use by uniqlist, #3062
             # find if allbyvars is leading subset of any of the indices; add a trailing "__" to fix #3498 where a longer column name starts with a shorter column name
             tt = paste0(c(allbyvars,""), collapse="__")
-            w = which.first(substring(paste0(indices(x),"__"),1L,nchar(tt)) == tt)
+            w = which.first(startsWith(paste0(indices(x), "__"), tt))
             if (!is.na(w)) {
               byindex = indices(x)[w]
               if (!length(getindex(x, byindex))) {
-                if (verbose) cat("by index '", byindex, "' but that index has 0 length. Ignoring.\n", sep="")
+                if (verbose) catf("by index '%s' but that index has 0 length. Ignoring.\n", byindex)
                 byindex=FALSE
               }
             }
@@ -788,10 +842,10 @@ replace_dot_alias = function(e) {
           # TO DO: Make xss directly, rather than recursive call.
           if (!is.na(nomatch)) irows = irows[irows!=0L]   # TO DO: can be removed now we have CisSortedSubset
           if (length(allbyvars)) {    ###############  TO DO  TO DO  TO DO  ###############
-            if (verbose) cat("i clause present and columns used in by detected, only these subset:",paste(allbyvars,collapse=","),"\n")
+            if (verbose) catf("i clause present and columns used in by detected, only these subset: %s\n", brackify(allbyvars))
             xss = x[irows,allbyvars,with=FALSE,nomatch=nomatch,mult=mult,roll=roll,rollends=rollends]
           } else {
-            if (verbose) cat("i clause present but columns used in by not detected. Having to subset all columns before evaluating 'by': '",deparse(by),"'\n",sep="")
+            if (verbose) catf("i clause present but columns used in by not detected. Having to subset all columns before evaluating 'by': '%s'\n", deparse(by))
             xss = x[irows,nomatch=nomatch,mult=mult,roll=roll,rollends=rollends]
           }
           if (bysub %iscall% ':' && length(bysub)==3L) {
@@ -827,10 +881,12 @@ replace_dot_alias = function(e) {
         if (!is.list(byval)) stop("'by' or 'keyby' must evaluate to a vector or a list of vectors (where 'list' includes data.table and data.frame which are lists, too)")
         if (length(byval)==1L && is.null(byval[[1L]])) bynull=TRUE #3530 when by=(function()NULL)()
         if (!bynull) for (jj in seq_len(length(byval))) {
-          if (!typeof(byval[[jj]]) %chin% ORDERING_TYPES) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]")
+          if (!(this_type <- typeof(byval[[jj]])) %chin% ORDERING_TYPES) {
+            stop(gettextf("Column or expression %d of 'by' or 'keyby' is type '%s' which is not currently supported. If you have a compelling use case, please add it to https://github.com/Rdatatable/data.table/issues/1597. As a workaround, consider converting the column to a supported type, e.g. by=sapply(list_col, toString), whilst taking care to maintain distinctness in the process.", jj, this_type))
+          }
         }
         tt = vapply_1i(byval,length)
-        if (any(tt!=xnrow)) stop(gettextf("The items in the 'by' or 'keyby' list are length(s) (%s). Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", paste(tt, collapse=","), xnrow, domain='R-data.table'))
+        if (any(tt!=xnrow)) stop(domain=NA, gettextf("The items in the 'by' or 'keyby' list are length(s) (%s). Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", paste(tt, collapse=","), xnrow))
         if (is.null(bynames)) bynames = rep.int("",length(byval))
         if (length(idx <- which(!nzchar(bynames))) && !bynull) {
           # TODO: improve this and unify auto-naming of jsub and bysub
@@ -849,14 +905,13 @@ replace_dot_alias = function(e) {
             if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) {
               bynames[jj] = deparse(bysubl[[jj+1L]])
               if (verbose)
-                cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt,
-                    "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="")
+                catf("by-expression '%s' is not named, and the auto-generated name '%s' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", bynames[jj], tt)
             }
             else bynames[jj] = tt
             # if user doesn't like this inferred name, user has to use by=list() to name the column
           }
           # Fix for #1334
-          if (any(duplicated(bynames))) {
+          if (anyDuplicated(bynames)) {
             bynames = make.unique(bynames)
           }
         }
@@ -866,8 +921,8 @@ replace_dot_alias = function(e) {
       jvnames = NULL
       drop_dot = function(x) {
         if (length(x)!=1L) stop("Internal error: drop_dot passed ",length(x)," items")  # nocov
-        if (identical(substring(x<-as.character(x), 1L, 1L), ".") && x %chin% c(".N", ".I", ".GRP", ".NGRP", ".BY"))
-          substring(x, 2L)
+        if (startsWith(x<-as.character(x), ".") && x %chin% c(".N", ".I", ".GRP", ".NGRP", ".BY"))
+          substr(x, 2L, nchar(x))
         else
           x
       }
@@ -884,12 +939,16 @@ replace_dot_alias = function(e) {
             # attempt to auto-name unnamed columns
             for (jj in which(nm=="")) {
               thisq = q[[jj + 1L]]
-              if (missing(thisq)) stop(gettextf("Item %d of the .() or list() passed to j is missing", jj, domain="R-data.table")) #3507
+              if (missing(thisq)) stop(domain=NA, gettextf("Item %d of the .() or list() passed to j is missing", jj)) #3507
               if (is.name(thisq)) nm[jj] = drop_dot(thisq)
               # TO DO: if call to a[1] for example, then call it 'a' too
             }
-            if (!is.null(jvnames) && any(idx <- nm != jvnames))
-              warning("Different branches of j expression produced different auto-named columns: ", brackify(sprintf('%s!=%s', nm[idx], jvnames[idx])), '; using the most "last" names', call. = FALSE)
+            if (!is.null(jvnames)) {
+              if (length(nm) != length(jvnames))
+                warning("j may not evaluate to the same number of columns for each group; if you're sure this warning is in error, please put the branching logic outside of [ for efficiency")
+              else if (any(idx <- nm != jvnames))
+                warning("Different branches of j expression produced different auto-named columns: ", brackify(sprintf('%s!=%s', nm[idx], jvnames[idx])), '; using the most "last" names. If this was intentional (e.g., you know only one branch will ever be used in a given query because the branch is controlled by a function argument), please (1) pull this branch out of the call; (2) explicitly provide missing defaults for each branch in all cases; or (3) use the same name for each branch and re-name it in a follow-up call.', call. = FALSE)
+            }
             jvnames <<- nm # TODO: handle if() list(a, b) else list(b, a) better
             setattr(q, "names", NULL)  # drops the names from the list so it's faster to eval the j for each group; reinstated at the end on the result.
           }
@@ -942,7 +1001,7 @@ replace_dot_alias = function(e) {
           } else {
             if (colsub %iscall% 'patterns') {
               # each pattern gives a new filter condition, intersect the end result
-              .SDcols = Reduce(intersect, do_patterns(colsub, names_x))
+              .SDcols = Reduce(intersect, eval_with_cols(colsub, names_x))
             } else {
               .SDcols = eval(colsub, parent.frame(), parent.frame())
               # allow filtering via function in .SDcols, #3950
@@ -983,7 +1042,7 @@ replace_dot_alias = function(e) {
         # added 'mget' - fix for #994
         if (any(c("get", "mget") %chin% av)){
           if (verbose)
-            cat(gettextf("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld ansvars: %s \n", brackify(ansvars), domain = "R-data.table"))
+            cat(gettextf("'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld ansvars: %s \n", brackify(ansvars)))
             # get('varname') is too difficult to detect which columns are used in general
             # eval(macro) column names are detected via the  if jsub[[1]]==eval switch earlier above.
 
@@ -1003,7 +1062,7 @@ replace_dot_alias = function(e) {
           }
           non_sdvars = setdiff(ansvars, sdvars)
           ansvals = chmatch(ansvars, names_x)
-          if (verbose) cat(gettextf("New ansvars: %s \n", brackify(ansvars), domain = "R-data.table"))
+          if (verbose) catf("New ansvars: %s \n", brackify(ansvars))
         } else if (length(non_sdvars)) {
           # we've a situation like DT[, c(sum(V1), lapply(.SD, mean)), by=., .SDcols=...] or
           # DT[, lapply(.SD, function(x) x *v1), by=, .SDcols=...] etc.,
@@ -1015,7 +1074,7 @@ replace_dot_alias = function(e) {
         if (!missing(.SDcols)) warning("This j doesn't use .SD but .SDcols has been supplied. Ignoring .SDcols. See ?data.table.")
         allcols = c(names_x, xdotprefix, names_i, idotprefix)
         ansvars = sdvars = setdiff(intersect(av, allcols), bynames)
-        if (verbose) cat("Detected that j uses these columns:",if (!length(ansvars)) "<none>" else paste(ansvars,collapse=","),"\n")
+        if (verbose) catf("Detected that j uses these columns: %s\n",if (!length(ansvars)) "<none>" else brackify(ansvars))
         # using a few named columns will be faster
         # Consider:   DT[,max(diff(date)),by=list(month=month(date))]
         # and:        DT[,lapply(.SD,sum),by=month(date)]
@@ -1062,7 +1121,7 @@ replace_dot_alias = function(e) {
           lhs = names_x[m]
         } else
           stop("LHS of := isn't column names ('character') or positions ('integer' or 'numeric')")
-        if (all(!is.na(m))) {
+        if (!anyNA(m)) {
           # updates by reference to existing columns
           cols = as.integer(m)
           newnames=NULL
@@ -1077,8 +1136,7 @@ replace_dot_alias = function(e) {
             # fix errors in their RHS when called on empty edge cases, even when the result won't be
             # used anyway (so it would be annoying to have to fix it.)
             if (verbose) {
-              cat("No rows match i. No new columns to add so not evaluating RHS of :=\n")
-              cat("Assigning to 0 row subset of",nrow(x),"rows\n")
+              catf("No rows match i. No new columns to add so not evaluating RHS of :=\nAssigning to 0 row subset of %d rows\n", nrow(x))
             }
             .Call(Cassign, x, irows, NULL, NULL, NULL) # only purpose is to write 0 to .Last.updated
             .global$print = address(x)
@@ -1100,9 +1158,9 @@ replace_dot_alias = function(e) {
             # i.e. reallocate at the size as if the new columns were added followed by setalloccol().
             name = substitute(x)
             if (is.name(name) && ok && verbose) { # && NAMED(x)>0 (TO DO)    # ok here includes -1 (loaded from disk)
-              cat("Growing vector of column pointers from truelength ", truelength(x), " to ", n, ". A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n")
+              catf("Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option.\n", truelength(x), n)
               # #1729 -- copying to the wrong environment here can cause some confusion
-              if (ok == -1L) cat("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n")
+              if (ok == -1L) catf("Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected.\n")
 
               # Verbosity should not issue warnings, so cat rather than warning.
               # TO DO: Add option 'datatable.pedantic' to turn on warnings like this.
@@ -1123,7 +1181,7 @@ replace_dot_alias = function(e) {
               if (is.list(k)) {
                 origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame())
                 if (is.character(j)) {
-                  if (length(j)!=1L) stop("Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] syntax is only valid when i is length 1, but it's length ", length(j))
+                  if (length(j)!=1L) stop("Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] syntax is only valid when i is length 1, but its length is ", length(j))
                   j = match(j, names(k))
                   if (is.na(j)) stop("Internal error -- item '", origj, "' not found in names of list") # nocov
                 }
@@ -1154,7 +1212,7 @@ replace_dot_alias = function(e) {
         xcolsAns = seq_along(ansvars)
         icols = icolsAns = integer()
       } else {
-        if (!length(leftcols)) stop("Internal error -- column(s) not found: ", paste(ansvars[wna],collapse=", ")) # nocov
+        if (!length(leftcols)) stop("Internal error -- column(s) not found: ", brackify(ansvars[wna])) # nocov
         xcols = w[!wna]
         xcolsAns = which(!wna)
         map = c(seq_along(i), leftcols)   # this map is to handle dups in leftcols, #3635
@@ -1184,8 +1242,8 @@ replace_dot_alias = function(e) {
   }
 
   syms = all.vars(jsub)
-  syms = syms[ substring(syms,1L,2L)==".." ]
-  syms = syms[ substring(syms,3L,3L)!="." ]  # exclude ellipsis
+  syms = syms[ startsWith(syms, "..") ]
+  syms = syms[ substr(syms, 3L, 3L) != "." ]  # exclude ellipsis
   for (sym in syms) {
     if (sym %chin% names_x) {
       # if "..x" exists as column name, use column, for backwards compatibility; e.g. package socialmixr in rev dep checks #2779
@@ -1193,7 +1251,7 @@ replace_dot_alias = function(e) {
       # TODO in future, as warned in NEWS item for v1.11.0 :
       # warning(sym," in j is looking for ",getName," in calling scope, but a column '", sym, "' exists. Column names should not start with ..")
     }
-    getName = substring(sym, 3L)
+    getName = substr(sym, 3L, nchar(sym))
     if (!exists(getName, parent.frame())) {
       if (exists(sym, parent.frame())) next  # user did 'manual' prefix; i.e. variable in calling scope has .. prefix
       stop("Variable '",getName,"' is not found in calling scope. Looking in calling scope because this symbol was prefixed with .. in the j= parameter.")
@@ -1335,15 +1393,20 @@ replace_dot_alias = function(e) {
         setattr(jval,"names",NULL)  # discard names of named vectors otherwise each cell in the column would have a name
         jval = list(jval)
       }
-      if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames)  # e.g. jvnames=="N" for DT[,.N,]
+      if (!is.null(jvnames) && any(nzchar(jvnames))) {
+        if (length(jvnames) > length(jval)) jvnames = jvnames[seq_along(jval)]  #4274
+        setattr(jval, 'names', jvnames[seq_along(jval)])  # e.g. jvnames=="N" for DT[,.N,]
+      }
       jval = as.data.table.list(jval, .named=NULL)
     }
 
     if (is.data.table(jval)) {
-      setattr(jval, 'class', class(x)) # fix for #64
+      # should set the parent class only when jval is a plain data.table #4324
+      if (identical(class(jval), c('data.table', 'data.frame')))
+        setattr(jval, 'class', class(x)) # fix for #64
       if (haskey(x) && all(key(x) %chin% names(jval)) && is.sorted(jval, by=key(x)))
         setattr(jval, 'sorted', key(x))
-      if (any(sapply(jval, is.null))) stop("Internal error: j has created a data.table result containing a NULL column") # nocov
+      if (any(vapply_1b(jval, is.null))) stop("Internal error: j has created a data.table result containing a NULL column") # nocov
     }
     return(jval)
   }
@@ -1371,7 +1434,7 @@ replace_dot_alias = function(e) {
   SDenv$`-.POSIXt` = function(e1, e2) {
     if (inherits(e2, 'POSIXt')) {
       if (verbose && !exists('done_units_report', parent.frame())) {
-        cat('\nNote: forcing units="secs" on implicit difftime by group; call difftime explicitly to choose custom units')
+        catf('\nNote: forcing units="secs" on implicit difftime by group; call difftime explicitly to choose custom units\n')
         assign('done_units_report', TRUE, parent.frame())
       }
       return(difftime(e1, e2, units='secs'))
@@ -1385,7 +1448,8 @@ replace_dot_alias = function(e) {
     byval = i
     bynames = if (missing(on)) head(key(x),length(leftcols)) else names(on)
     allbyvars = NULL
-    bysameorder = haskey(i) || (is.sorted(f__) && ((roll == FALSE) || length(f__) == 1L)) # Fix for #1010
+    bysameorder = (haskey(i) && identical(leftcols, chmatch(head(key(i),length(leftcols)), names(i)))) || # leftcols leading subset of key(i); see #4917
+                  (roll==FALSE && is.sorted(f__)) # roll==FALSE is fix for #1010
     ##  'av' correct here ??  *** TO DO ***
     xjisvars = intersect(av, names_x[rightcols])  # no "x." for xvars.
     # if 'get' is in 'av' use all cols in 'i', fix for bug #34
@@ -1407,7 +1471,7 @@ replace_dot_alias = function(e) {
 
     if (length(byval) && length(byval[[1L]])) {
       if (!bysameorder && isFALSE(byindex)) {
-        if (verbose) {last.started.at=proc.time();cat("Finding groups using forderv ... ");flush.console()}
+        if (verbose) {last.started.at=proc.time();catf("Finding groups using forderv ... ");flush.console()}
         o__ = forderv(byval, sort=keyby, retGrp=TRUE)
         # The sort= argument is called sortGroups at C level. It's primarily for saving the sort of unique strings at
         # C level for efficiency when by= not keyby=. Other types also retain appearance order, but at byte level to
@@ -1421,7 +1485,7 @@ replace_dot_alias = function(e) {
         if (verbose) {
           cat(timetaken(last.started.at),"\n")
           last.started.at=proc.time()
-          cat("Finding group sizes from the positions (can be avoided to save RAM) ... ")
+          catf("Finding group sizes from the positions (can be avoided to save RAM) ... ")
           flush.console()  # for windows
         }
         f__ = attr(o__, "starts", exact=TRUE)
@@ -1429,7 +1493,7 @@ replace_dot_alias = function(e) {
         if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
         if (!bysameorder && !keyby) {
           # TO DO: lower this into forder.c
-          if (verbose) {last.started.at=proc.time();cat("Getting back original order ... ");flush.console()}
+          if (verbose) {last.started.at=proc.time();catf("Getting back original order ... ");flush.console()}
           firstofeachgroup = o__[f__]
           if (length(origorder <- forderv(firstofeachgroup))) {
             f__ = f__[origorder]
@@ -1441,11 +1505,11 @@ replace_dot_alias = function(e) {
       } else {
         if (verbose) last.started.at=proc.time();
         if (bysameorder) {
-          if (verbose) {cat("Finding groups using uniqlist on key ... ");flush.console()}
+          if (verbose) {catf("Finding groups using uniqlist on key ... ");flush.console()}
           f__ = uniqlist(byval)
         } else {
           if (!is.character(byindex) || length(byindex)!=1L) stop("Internal error: byindex not the index name")  # nocov
-          if (verbose) {cat("Finding groups using uniqlist on index '", byindex, "' ... ", sep="");flush.console()}
+          if (verbose) {catf("Finding groups using uniqlist on index '%s' ... ", byindex);flush.console()}
           o__ = getindex(x, byindex)
           if (is.null(o__)) stop("Internal error: byindex not found")  # nocov
           f__ = uniqlist(byval, order=o__)
@@ -1453,7 +1517,7 @@ replace_dot_alias = function(e) {
         if (verbose) {
           cat(timetaken(last.started.at),"\n")
           last.started.at=proc.time()
-          cat("Finding group sizes from the positions (can be avoided to save RAM) ... ")
+          catf("Finding group sizes from the positions (can be avoided to save RAM) ... ")
           flush.console()  # for windows
         }
         len__ = uniqlengths(f__, xnrow)
@@ -1603,7 +1667,8 @@ replace_dot_alias = function(e) {
                 jl__ = as.list(jsubl[[i_]])[-1L] # just keep the '.' from list(.)
                 jn__ = if (is.null(names(jl__))) rep("", length(jl__)) else names(jl__)
                 idx  = unlist(lapply(jl__, function(x) is.name(x) && x == ".I"))
-                if (any(idx)) jn__[idx & (jn__ == "")] = "I"
+                if (any(idx))
+                  jn__[idx & !nzchar(jn__)] = "I"  # this & is correct not &&
                 jvnames = c(jvnames, jn__)
                 jsubl[[i_]] = jl__
               }
@@ -1644,9 +1709,9 @@ replace_dot_alias = function(e) {
     }
     if (verbose) {
       if (!identical(oldjsub, jsub))
-        cat("lapply optimization changed j from '",deparse(oldjsub),"' to '",deparse(jsub,width.cutoff=200L, nlines=1L),"'\n",sep="")
+        catf("lapply optimization changed j from '%s' to '%s'\n", deparse(oldjsub), deparse(jsub,width.cutoff=200L, nlines=1L))
       else
-        cat("lapply optimization is on, j unchanged as '",deparse(jsub,width.cutoff=200L, nlines=1L),"'\n",sep="")
+        catf("lapply optimization is on, j unchanged as '%s'\n", deparse(jsub,width.cutoff=200L, nlines=1L))
     }
     dotN = function(x) is.name(x) && x==".N" # For #334. TODO: Rprof() showed dotN() may be the culprit if iterated (#1470)?; avoid the == which converts each x to character?
     # FR #971, GForce kicks in on all subsets, no joins yet. Although joins could work with
@@ -1656,7 +1721,7 @@ replace_dot_alias = function(e) {
         GForce = FALSE
         if ( (is.name(jsub) && jsub==".N") || (jsub %iscall% 'list' && length(jsub)==2L && jsub[[2L]]==".N") ) {
           GForce = TRUE
-          if (verbose) cat("GForce optimized j to '",deparse(jsub, width.cutoff=200L, nlines=1L),"'\n",sep="")
+          if (verbose) catf("GForce optimized j to '%s'\n",deparse(jsub, width.cutoff=200L, nlines=1L))
         }
       } else {
         # Apply GForce
@@ -1666,8 +1731,9 @@ replace_dot_alias = function(e) {
           # is.symbol() is for #1369, #1974 and #2949
           if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE)
           if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE)  # 875
-          if ((length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) && (!q1 %chin% c("head","tail"))) return(TRUE)
-          # ... head-tail uses default value n=6 which as of now should not go gforce ^^
+          if ((length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na"))) && (!q1 %chin% c("head","tail"))) return(TRUE)
+          #                       ^^ base::startWith errors on NULL unfortunately
+          #        head-tail uses default value n=6 which as of now should not go gforce ... ^^
           # otherwise there must be three arguments, and only in two cases:
           #   1) head/tail(x, 1) or 2) x[n], n>0
           length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) &&
@@ -1690,8 +1756,8 @@ replace_dot_alias = function(e) {
             jsub[[1L]] = as.name(paste0("g", jsub[[1L]]))
             if (length(jsub)==3L) jsub[[3L]] = eval(jsub[[3L]], parent.frame())   # tests 1187.3 & 1187.5
           }
-          if (verbose) cat("GForce optimized j to '",deparse(jsub, width.cutoff=200L, nlines=1L),"'\n",sep="")
-        } else if (verbose) cat("GForce is on, left j unchanged\n");
+          if (verbose) catf("GForce optimized j to '%s'\n", deparse(jsub, width.cutoff=200L, nlines=1L))
+        } else if (verbose) catf("GForce is on, left j unchanged\n");
       }
     }
     if (!GForce && !is.name(jsub)) {
@@ -1714,9 +1780,9 @@ replace_dot_alias = function(e) {
       }
       if (verbose) {
         if (!identical(oldjsub, jsub))
-          cat("Old mean optimization changed j from '",deparse(oldjsub),"' to '",deparse(jsub, width.cutoff=200L, nlines=1L),"'\n",sep="")
+          catf("Old mean optimization changed j from '%s' to '%s'\n", deparse(oldjsub), deparse(jsub, width.cutoff=200L, nlines=1L))
         else
-          cat("Old mean optimization is on, left j unchanged.\n")
+          catf("Old mean optimization is on, left j unchanged.\n")
       }
       assign("Cfastmean", Cfastmean, SDenv)
       # Old comments still here for now ...
@@ -1726,8 +1792,8 @@ replace_dot_alias = function(e) {
       # when fastmean can do trim.
     }
   } else if (verbose) {
-    if (getOption("datatable.optimize")<1L) cat("All optimizations are turned off\n")
-    else cat("Optimization is on but left j unchanged (single plain symbol): '",deparse(jsub, width.cutoff=200L, nlines=1L),"'\n",sep="")
+    if (getOption("datatable.optimize")<1L) catf("All optimizations are turned off\n")
+    else catf("Optimization is on but left j unchanged (single plain symbol): '%s'\n", deparse(jsub, width.cutoff=200L, nlines=1L))
   }
   if (byjoin) {
     groups = i
@@ -1756,7 +1822,7 @@ replace_dot_alias = function(e) {
     # for consistency of empty case in test 184
     f__=len__=0L
   }
-  if (verbose) {last.started.at=proc.time();cat("Making each group and running j (GForce ",GForce,") ... ",sep="");flush.console()}
+  if (verbose) {last.started.at=proc.time();catf("Making each group and running j (GForce %s) ... ", GForce);flush.console()}
   if (GForce) {
     thisEnv = new.env()  # not parent=parent.frame() so that gsum is found
     for (ii in ansvars) assign(ii, x[[ii]], thisEnv)
@@ -1802,7 +1868,7 @@ replace_dot_alias = function(e) {
       cnames = as.character(bysubl)[-1L]
       cnames = gsub('^`|`$', '', cnames)  # the wrapping backticks that were added above can be removed now, #3378
       if (all(cnames %chin% names_x)) {
-        if (verbose) {last.started.at=proc.time();cat("setkey() after the := with keyby= ... ");flush.console()}
+        if (verbose) {last.started.at=proc.time();catf("setkey() after the := with keyby= ... ");flush.console()}
         setkeyv(x,cnames)  # TO DO: setkey before grouping to get memcpy benefit.
         if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
       }
@@ -1829,7 +1895,7 @@ replace_dot_alias = function(e) {
     setnames(ans,seq_along(bynames),bynames)   # TO DO: reinvestigate bynames flowing from dogroups here and simplify
   }
   if (byjoin && keyby && !bysameorder) {
-    if (verbose) {last.started.at=proc.time();cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()}
+    if (verbose) {last.started.at=proc.time();catf("setkey() afterwards for keyby=.EACHI ... ");flush.console()}
     setkeyv(ans,names(ans)[seq_along(byval)])
     if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
   } else if (keyby || (haskey(x) && bysameorder && (byjoin || (length(allbyvars) && identical(allbyvars,head(key(x),length(allbyvars))))))) {
@@ -1842,7 +1908,7 @@ replace_dot_alias = function(e) {
   if (length(expr)==2L)  # no parameters passed to mean, so defaults of trim=0 and na.rm=FALSE
     return(call(".External",quote(Cfastmean),expr[[2L]], FALSE))
     # return(call(".Internal",expr))  # slightly faster than .External, but R now blocks .Internal in coerce.c from apx Sep 2012
-  if (length(expr)==3L && identical("na",substring(names(expr)[3L], 1L, 2L)))   # one parameter passed to mean()
+  if (length(expr)==3L && startsWith(names(expr)[3L], "na"))   # one parameter passed to mean()
     return(call(".External",quote(Cfastmean),expr[[2L]], expr[[3L]]))  # faster than .Call
   assign("nomeanopt",TRUE,parent.frame())
   expr  # e.g. trim is not optimized, just na.rm
@@ -2322,25 +2388,23 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR
     join = TRUE
   }
   dtq[["j"]] = substitute(
-    list(.ll.tech.split=list(.expr)),
-    list(.expr = if (join) quote(if(.N == 0L) .SD[0L] else .SD) else as.name(".SD")) # simplify when `nomatch` accept NULL #857 ?
+    list(.ll.tech.split=list(.expr), .ll.tech.split.names=paste(lapply(.BY, as.character), collapse=".")),
+    list(.expr = if (join) quote(if(.N == 0L) .SD[0L] else .SD) else as.name(".SD"))
   )
-  by.or.keyby = if (join) "by" else c("by"[!sorted], "keyby"[sorted])[1L]
-  dtq[[by.or.keyby]] = substitute( # retain order, for `join` and `sorted` it will use order of `i` data.table instead of `keyby`.
+  dtq[["by"]] = substitute( # retain order, for `join` and `sorted` it will use order of `i` data.table instead of `keyby`.
     .expr,
-    list(.expr = if(join) {as.name(".EACHI")} else if (flatten) by else .by)
+    list(.expr = if (join) as.name(".EACHI") else if (flatten) by else .by)
   )
+  dtq[["keyby"]] = if (join) FALSE else sorted
   dtq[[".SDcols"]] = if (keep.by) names(x) else setdiff(names(x), if (flatten) by else .by)
   if (join) dtq[["on"]] = if (flatten) by else .by
   dtq = as.call(dtq)
-  if (isTRUE(verbose)) cat("Processing split.data.table with: ", deparse(dtq, width.cutoff=500L), "\n", sep="")
+  if (isTRUE(verbose)) catf("Processing split.data.table with: %s\n", deparse(dtq, width.cutoff=500L))
   tmp = eval(dtq)
   # add names on list
-  setattr(ll <- tmp$.ll.tech.split,
-      "names",
-      as.character(
-        if (!flatten) tmp[[.by]] else tmp[, list(.nm.tech.split=paste(unlist(lapply(.SD, as.character)), collapse = ".")), by=by, .SDcols=by]$.nm.tech.split
-      ))
+  ll = tmp$.ll.tech.split
+  nm = tmp$.ll.tech.split.names
+  setattr(ll, "names", nm)
   # handle nested split
   if (flatten || length(by) == 1L) {
     for (x in ll) .Call(C_unlock, x)
@@ -2521,7 +2585,7 @@ setnames = function(x,old,new,skip_absent=FALSE) {
         }
       }
     }
-    if (any(w <- new==names(x)[i] & Encoding(new)==Encoding(names(x)[i]))) {
+    if (any(w <- new==names(x)[i] & Encoding(new)==Encoding(names(x)[i]))) {  # this & is correct not &&
       w = which(!w)
       new = new[w]
       i = i[w]
@@ -2933,7 +2997,7 @@ isReallyReal = function(x) {
     RHS = eval(stub[[3L]], x, enclos)
     if (is.list(RHS)) RHS = as.character(RHS)  # fix for #961
     if (length(RHS) != 1L && !operator %chin% c("%in%", "%chin%")){
-      if (length(RHS) != nrow(x)) stop(gettextf("RHS of %s is length %d which is not 1 or nrow (%d). For robustness, no recycling is allowed (other than of length 1 RHS). Consider %%in%% instead.", operator, length(RHS), nrow(x), domain="R-data.table"), domain=NA)
+      if (length(RHS) != nrow(x)) stop(domain=NA, gettextf("RHS of %s is length %d which is not 1 or nrow (%d). For robustness, no recycling is allowed (other than of length 1 RHS). Consider %%in%% instead.", operator, length(RHS), nrow(x)))
       return(NULL) # DT[colA == colB] regular element-wise vector scan
     }
     if ( mode(x[[col]]) != mode(RHS) ||                # mode() so that doubleLHS/integerRHS and integerLHS/doubleRHS!isReallyReal are optimized (both sides mode 'numeric')
@@ -2965,7 +3029,7 @@ isReallyReal = function(x) {
   ## convert i to data.table with all combinations in rows.
   if(length(i) > 1L && prod(vapply_1i(i, length)) > 1e4){
     ## CJ would result in more than 1e4 rows. This would be inefficient, especially memory-wise #2635
-    if (verbose) {cat("Subsetting optimization disabled because the cross-product of RHS values exceeds 1e4, causing memory problems.\n");flush.console()}
+    if (verbose) {catf("Subsetting optimization disabled because the cross-product of RHS values exceeds 1e4, causing memory problems.\n");flush.console()}
     return(NULL)
   }
   ## Care is needed with names as we construct i
@@ -2978,14 +3042,15 @@ isReallyReal = function(x) {
   i = do.call(CJ, i)
   setnames(i, colNames)
   idx = NULL
-  if(is.null(idx)){
-      ## check whether key fits the columns in i.
-      ## order of key columns makes no difference, as long as they are all upfront in the key, I believe.
-      if (all(names(i) %chin% head(key(x), length(i)))){
-          if (verbose) {cat("Optimized subsetting with key '", paste0( head(key(x), length(i)), collapse = ", "),"'\n",sep="");flush.console()}
-          idx = integer(0L) ## integer(0L) not NULL! Indicates that x is ordered correctly.
-          idxCols = head(key(x), length(i)) ## in correct order!
-      }
+  if (is.null(idx)) {
+    ## check whether key fits the columns in i.
+    ## order of key columns makes no difference, as long as they are all upfront in the key, I believe.
+    key_head = head(key(x), length(i))
+    if (all(names(i) %chin% key_head)) {
+      if (verbose) {catf("Optimized subsetting with key %s", brackify(key_head)); flush.console()}
+      idx = integer(0L) ## integer(0L) not NULL! Indicates that x is ordered correctly.
+      idxCols = key_head ## in correct order!
+    }
   }
   if (is.null(idx)){
     if (!getOption("datatable.use.index")) return(NULL) # #1422
@@ -3001,17 +3066,17 @@ isReallyReal = function(x) {
       }
     }
     if (!is.null(idx)){
-      if (verbose) {cat("Optimized subsetting with index '", paste0( idxCols, collapse = "__"),"'\n",sep="");flush.console()}
+      if (verbose) {catf("Optimized subsetting with index '%s'\n", paste0( idxCols, collapse = "__"));flush.console()}
     }
   }
   if (is.null(idx)){
     ## if nothing else helped, auto create a new index that can be used
     if (!getOption("datatable.auto.index")) return(NULL)
-    if (verbose) {cat("Creating new index '", paste0(names(i), collapse = "__"),"'\n",sep="");flush.console()}
-    if (verbose) {last.started.at=proc.time();cat("Creating index", paste0(names(i), collapse = "__"), "done in ... ");flush.console()}
+    if (verbose) {catf("Creating new index '%s'\n", paste0(names(i), collapse = "__"));flush.console()}
+    if (verbose) {last.started.at=proc.time();catf("Creating index %s done in ...", paste0(names(i), collapse = "__"));flush.console()}
     setindexv(x, names(i))
     if (verbose) {cat(timetaken(last.started.at),"\n");flush.console()}
-    if (verbose) {cat("Optimized subsetting with index '", paste0(names(i), collapse = "__"),"'\n",sep="");flush.console()}
+    if (verbose) {catf("Optimized subsetting with index '%s'\n", paste0(names(i), collapse = "__"));flush.console()}
     idx = attr(attr(x, "index", exact=TRUE), paste0("__", names(i), collapse = ""), exact=TRUE)
     idxCols = names(i)
   }
@@ -3118,7 +3183,7 @@ isReallyReal = function(x) {
   }
   idx_op = match(operators, ops, nomatch=0L)
   if (any(idx_op %in% c(0L, 6L)))
-    stop("Invalid operators ", paste(operators[idx_op %in% c(0L, 6L)], collapse=","), ". Only allowed operators are ", paste(ops[1:5], collapse=""), ".")
+    stop(domain=NA, gettextf("Invalid join operators %s. Only allowed operators are %s.", brackify(operators[idx_op %in% c(0L, 6L)]), brackify(ops[1:5])))
   ## the final on will contain the xCol as name, the iCol as value
   on = iCols
   names(on) = xCols
diff --git a/R/devel.R b/R/devel.R
index b0dfb71858..1da19b7c98 100644
--- a/R/devel.R
+++ b/R/devel.R
@@ -13,7 +13,7 @@ dcf.repo = function(pkg, repo, field, type) {
   idx = file(file.path(contrib.url(repo, type=type),"PACKAGES"))
   on.exit(close(idx))
   dcf = read.dcf(idx, fields=c("Package",field))
-  if (!pkg %in% dcf[,"Package"]) stop(gettextf("There is no package %s in provided repository.", pkg, domain='R-data.table'))
+  if (!pkg %in% dcf[,"Package"]) stop(domain=NA, gettextf("There is no package %s in provided repository.", pkg))
   dcf[dcf[,"Package"]==pkg, field][[1L]]
 }
 
@@ -28,8 +28,8 @@ update.dev.pkg = function(object="data.table", repo="https://Rdatatable.gitlab.i
   # get Revision field from remote repository PACKAGES file
   una = is.na(ups<-dcf.repo(pkg, repo, field, type))
   if (una)
-    cat(sprintf("No revision information found in DESCRIPTION file for %s package. Unsure '%s' is correct field in PACKAGES file in your package repository '%s'. Otherwise package will be re-installed every time, proceeding to installation.\n",
-                pkg, field, contrib.url(repo, type=type)))
+    catf("No revision information found in DESCRIPTION file for %s package. Unsure '%s' is correct field in PACKAGES file in your package repository '%s'. Otherwise package will be re-installed every time, proceeding to installation.\n",
+         pkg, field, contrib.url(repo, type=type))
   # see if Revision is different then currently installed Revision, note that installed package will have Revision info only when it was installed from remote devel repo
   upg = una || !identical(ups, dcf.lib(pkg, field, lib.loc=lib))
   # update.dev.pkg fails on windows R 4.0.0, we have to unload package namespace before installing new version #4403
@@ -50,7 +50,7 @@ update.dev.pkg = function(object="data.table", repo="https://Rdatatable.gitlab.i
 .git = function(quiet=FALSE, lib.loc=NULL) {
   ans = unname(read.dcf(system.file("DESCRIPTION", package="data.table", lib.loc=lib.loc, mustWork=TRUE), fields="Revision")[, "Revision"])
   if (!quiet && is.na(ans))
-    cat("Git revision is not available. Most likely data.table was installed from CRAN or local archive.\nGit revision is available when installing from our repositories 'https://Rdatatable.gitlab.io/data.table' and 'https://Rdatatable.github.io/data.table'.\n")
+    catf("Git revision is not available. Most likely data.table was installed from CRAN or local archive.\nGit revision is available when installing from our repositories 'https://Rdatatable.gitlab.io/data.table' and 'https://Rdatatable.github.io/data.table'.\n")
   ans
 }
 
diff --git a/R/duplicated.R b/R/duplicated.R
index 1ae7e8a6e4..249a5470c5 100644
--- a/R/duplicated.R
+++ b/R/duplicated.R
@@ -1,14 +1,12 @@
 duplicated.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_along(x), ...) {
   if (!cedta()) return(NextMethod("duplicated")) #nocov
-  if (!identical(incomparables, FALSE)) {
+  if (!isFALSE(incomparables)) {
     .NotYetUsed("incomparables != FALSE")
   }
   if (nrow(x) == 0L || ncol(x) == 0L) return(logical(0L)) # fix for bug #28
   if (is.na(fromLast) || !is.logical(fromLast)) stop("'fromLast' must be TRUE or FALSE")
+  if (!length(by)) by = NULL  #4594
   query = .duplicated.helper(x, by)
-  # fix for bug #44 - unique on null data table returns error (because of 'forderv')
-  # however, in this case we can bypass having to go to forderv at all.
-  if (!length(query$by)) return(logical(0L))
 
   if (query$use.keyprefix) {
     f = uniqlist(shallow(x, query$by))
@@ -27,10 +25,11 @@ duplicated.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_
 
 unique.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_along(x), ...) {
   if (!cedta()) return(NextMethod("unique")) # nocov
-  if (!identical(incomparables, FALSE)) {
+  if (!isFALSE(incomparables)) {
     .NotYetUsed("incomparables != FALSE")
   }
   if (nrow(x) <= 1L) return(x)
+  if (!length(by)) by = NULL  #4594
   o = forderv(x, by=by, sort=FALSE, retGrp=TRUE)
   # if by=key(x), forderv tests for orderedness within it quickly and will short-circuit
   # there isn't any need in unique() to call uniqlist like duplicated does; uniqlist returns a new nrow(x) vector anyway and isn't
@@ -105,14 +104,15 @@ uniqueN = function(x, by = if (is.list(x)) seq_along(x) else NULL, na.rm=FALSE)
     if (is.logical(x)) return(.Call(CuniqueNlogical, x, na.rm=na.rm))
     x = as_list(x)
   }
+  if (!length(by)) by = NULL  #4594
   o = forderv(x, by=by, retGrp=TRUE, na.last=if (!na.rm) FALSE else NA)
   starts = attr(o, 'starts', exact=TRUE)
-  if (!na.rm) {
-    length(starts)
-  } else {
+  if (na.rm) {
     # TODO: internal efficient sum
     # fix for #1771, account for already sorted input
     sum( (if (length(o)) o[starts] else starts) != 0L)
+  } else {
+    length(starts)
   }
 }
 
diff --git a/R/fcast.R b/R/fcast.R
index dbde95846a..a95f03a448 100644
--- a/R/fcast.R
+++ b/R/fcast.R
@@ -57,7 +57,7 @@ value_vars = function(value.var, varnames) {
   valnames = unique(unlist(value.var))
   iswrong = which(!valnames %chin% varnames)
   if (length(iswrong))
-    stop("value.var values [", paste(value.var[iswrong], collapse=", "), "] are not found in 'data'.")
+    stop("value.var values ", brackify(value.var[iswrong]), " are not found in 'data'.")
   value.var
 }
 
diff --git a/R/fmelt.R b/R/fmelt.R
index 3594fce8ca..009369ea9e 100644
--- a/R/fmelt.R
+++ b/R/fmelt.R
@@ -3,7 +3,7 @@
 #   reshape2 package is deprecated since December 2017, so we'll deprecate our
 #   redirection as well
 
-melt <- function(data, ..., na.rm = FALSE, value.name = "value") {
+melt = function(data, ..., na.rm = FALSE, value.name = "value") {
   if (is.data.table(data)) {
     UseMethod("melt", data)
     # if data is not data.table and reshape2 is installed, this won't dispatch to reshape2's method;
@@ -22,12 +22,172 @@ melt <- function(data, ..., na.rm = FALSE, value.name = "value") {
 patterns = function(..., cols=character(0L)) {
   # if ... has no names, names(list(...)) will be "";
   #   this assures they'll be NULL instead
-  p = unlist(list(...), use.names = any(nzchar(names(...))))
+  L = list(...)
+  p = unlist(L, use.names = any(nzchar(names(L))))
   if (!is.character(p))
     stop("Input patterns must be of type character.")
-  lapply(p, grep, cols)
+  matched = lapply(p, grep, cols)
+  # replace with lengths when R 3.2.0 dependency arrives
+  if (length(idx <- which(sapply(matched, length) == 0L)))
+    stop('Pattern', if (length(idx) > 1L) 's', ' not found: [',
+         paste(p[idx], collapse = ', '), ']')
+  matched
 }
 
+measure = function(..., sep="_", pattern, cols, multiple.keyword="value.name") {
+  mcall = match.call()
+  L = as.list(mcall)[-1]
+  formal.names = names(formals())
+  formal.i.vec = which(names(L) %in% formal.names)
+  fun.list = L[-formal.i.vec]
+  user.named = names(fun.list) != ""
+  is.symb = sapply(fun.list, is.symbol)
+  bad.i = which((!user.named) & (!is.symb))
+  if (length(bad.i)) {
+    stop("each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: ", paste(bad.i, collapse=","))
+  }
+  names(fun.list)[!user.named] = sapply(fun.list[!user.named], paste)
+  fun.list[!user.named] = list(NULL)
+  # group names error checking.
+  group.is.formal = names(fun.list) %in% formal.names
+  if (any(group.is.formal)) {
+    bad.names = names(fun.list)[group.is.formal]
+    stop("group names specified in ... conflict with measure argument names; please fix by changing group names: ", paste(bad.names, collapse=","))
+  }
+  # evaluate each value in ... and stop if not function.
+  for (fun.i in which(user.named)) {
+    fun = eval(fun.list[[fun.i]], parent.frame(1L))
+    if (!is.function(fun) || length(formals(args(fun)))==0) {
+      stop("each ... argument to measure must be a function with at least one argument, problem: ", names(fun.list)[[fun.i]])
+    }
+    fun.list[[fun.i]] = fun
+  }  
+  measurev.args = c(
+    list(fun.list),
+    L[formal.i.vec],
+    list(group.desc="... arguments to measure"))
+  do.call(measurev, measurev.args)
+}
+
+measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.name", group.desc="elements of fun.list"){
+  # 1. basic error checking.
+  if (!missing(sep) && !missing(pattern)) {
+    stop("both sep and pattern arguments used; must use either sep or pattern (not both)")
+  }
+  if (!(is.character(multiple.keyword) && length(multiple.keyword)==1 && !is.na(multiple.keyword) && nchar(multiple.keyword)>0)) {
+    stop("multiple.keyword must be a character string with nchar>0")
+  }
+  if (!is.character(cols)) {
+    stop("cols must be a character vector of column names")
+  }
+  prob.i <- if (is.null(names(fun.list))) {
+    seq_along(fun.list)
+  } else {
+    which(names(fun.list) == "")
+  }
+  if (length(prob.i)) {
+    stop("in measurev, ", group.desc, " must be named, problems: ", paste(prob.i, collapse=","))
+  }
+  err.names.unique = function(err.what, name.vec) {
+    name.tab = table(name.vec)
+    bad.counts = name.tab[1 < name.tab]
+    if (length(bad.counts)) {
+      stop(err.what, " should be uniquely named, problems: ", paste(names(bad.counts), collapse=","))
+    }
+  }
+  err.args.groups = function(type, N){
+    if (N != length(fun.list)) {
+      stop("number of ", group.desc, " =", length(fun.list), " must be same as ", type, " =", N)
+    }
+  }
+  err.names.unique(group.desc, names(fun.list))
+  # 2. compute initial group data table, used as variable_table attribute.
+  group.mat = if (!missing(pattern)) {
+    if (!is.character(pattern)) {
+      stop("pattern must be character string")
+    }
+    match.vec = regexpr(pattern, cols, perl=TRUE)
+    measure.vec = which(0 < match.vec)
+    if (length(measure.vec) == 0L) {
+      stop("pattern did not match any cols, so nothing would be melted; fix by changing pattern")
+    }
+    start = attr(match.vec, "capture.start")[measure.vec, , drop=FALSE]
+    if (is.null(start)) {
+      stop("pattern must contain at least one capture group (parenthesized sub-pattern)")
+    }
+    err.args.groups("number of capture groups in pattern", ncol(start))
+    end = attr(match.vec, "capture.length")[measure.vec,]+start-1L
+    names.mat = matrix(cols[measure.vec], nrow(start), ncol(start))
+    substr(names.mat, start, end)
+  } else { #pattern not specified, so split using sep.
+    if (!is.character(sep)) {
+      stop("sep must be character string")
+    }
+    list.of.vectors = strsplit(cols, sep, fixed=TRUE)
+    vector.lengths = sapply(list.of.vectors, length)
+    n.groups = max(vector.lengths)
+    if (n.groups == 1) {
+      stop("each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
+    }
+    err.args.groups("max number of items after splitting column names", n.groups)
+    measure.vec = which(vector.lengths==n.groups)
+    do.call(rbind, list.of.vectors[measure.vec])
+  }
+  err.names.unique("measured columns", cols[measure.vec])
+  uniq.mat = unique(group.mat)
+  if (nrow(uniq.mat) < nrow(group.mat)) {
+    stop("number of unique column IDs =", nrow(uniq.mat), " is less than number of melted columns =", nrow(group.mat), "; fix by changing pattern/sep")
+  }
+  colnames(group.mat) = names(fun.list)
+  group.dt = data.table(group.mat)
+  # 3. apply conversion functions to group data table.
+  fun.i.vec = which(!sapply(fun.list, is.null))
+  for (group.i in fun.i.vec) {
+    group.name = names(fun.list)[[group.i]]
+    fun = fun.list[[group.i]]
+    if (!is.function(fun) || length(formals(args(fun)))==0) {
+      stop("in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: ", group.name)
+    }
+    group.val = fun(group.dt[[group.name]])
+    if (!(is.atomic(group.val) && length(group.val)==nrow(group.dt))) {
+      stop("each conversion function must return an atomic vector with same length as its first argument, problem: ", group.name)
+    }
+    if (all(is.na(group.val))) {
+      stop(group.name, " conversion function returned vector of all NA")
+    }
+    set(group.dt, j=group.name, value=group.val)
+  }
+  group.uniq = unique(group.dt)
+  if (nrow(group.uniq) < nrow(group.dt)) {
+    stop("number of unique groups after applying type conversion functions less than number of groups, change type conversion")
+  }
+  # 4. compute measure.vars list or vector.
+  if (multiple.keyword %in% names(fun.list)) {# multiple output columns.
+    if (!is.character(group.dt[[multiple.keyword]])) {
+      stop(multiple.keyword, " column class=", class(group.dt[[multiple.keyword]])[[1L]], " after applying conversion function, but must be character")
+    }
+    is.other = names(group.dt) != multiple.keyword
+    if (!any(is.other)) {
+      stop(multiple.keyword, " is the only group; fix by creating at least one more group")
+    }
+    other.values = lapply(group.dt[, is.other, with=FALSE], unique)
+    other.values$stringsAsFactors = FALSE
+    other.dt = data.table(do.call(expand.grid, other.values))
+    measure.list = structure(list(), variable_table=other.dt)
+    column.values = unique(group.dt[[multiple.keyword]])
+    for (column.val in column.values) {
+      select.dt = data.table(other.dt)
+      set(select.dt, j=multiple.keyword, value=column.val)
+      measure.list[[column.val]] = data.table(
+        measure.vec, group.dt
+      )[select.dt, measure.vec, on=names(select.dt)]
+    }
+    measure.list
+  } else {# single output column.
+    structure(measure.vec, variable_table=group.dt)
+  }
+}  
+
 melt.data.table = function(data, id.vars, measure.vars, variable.name = "variable",
        value.name = "value", ..., na.rm = FALSE, variable.factor = TRUE, value.factor = FALSE,
        verbose = getOption("datatable.verbose")) {
@@ -35,8 +195,11 @@ melt.data.table = function(data, id.vars, measure.vars, variable.name = "variabl
   if (missing(id.vars)) id.vars=NULL
   if (missing(measure.vars)) measure.vars = NULL
   measure.sub = substitute(measure.vars)
-  if (measure.sub %iscall% "patterns") {
-    measure.vars = do_patterns(measure.sub, names(data))
+  if (is.call(measure.sub)) {
+    eval.result = eval_with_cols(measure.sub, names(data))
+    if (!is.null(eval.result)) {
+      measure.vars = eval.result
+    }
   }
   if (is.list(measure.vars) && length(measure.vars) > 1L) {
     meas.nm = names(measure.vars)
@@ -62,8 +225,8 @@ melt.data.table = function(data, id.vars, measure.vars, variable.name = "variabl
       variable.name, value.name, as.logical(na.rm),
       as.logical(verbose))
   setDT(ans)
-  if (any(duplicated(names(ans)))) {
-    cat("Duplicate column names found in molten data.table. Setting unique names using 'make.names'\n")
+  if (anyDuplicated(names(ans))) {
+    catf("Duplicate column names found in molten data.table. Setting unique names using 'make.names'\n")
     setnames(ans, make.unique(names(ans)))
   }
   setattr(ans, 'sorted', NULL)
diff --git a/R/foverlaps.R b/R/foverlaps.R
index 8028482abb..fc0b706ccd 100644
--- a/R/foverlaps.R
+++ b/R/foverlaps.R
@@ -128,7 +128,7 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k
               end = yintervals[2L], any =,
               within =, equal = yintervals)
   call = construct(head(ynames, -2L), uycols, type)
-  if (verbose) {last.started.at=proc.time();cat("unique() + setkey() operations done in ...");flush.console()}
+  if (verbose) {last.started.at=proc.time();catf("unique() + setkey() operations done in ...");flush.console()}
   uy = unique(y[, eval(call)]) # this started to fail from R 4.1 due to c(POSIXct, numeric)
   setkey(uy)[, `:=`(lookup = list(list(integer(0L))), type_lookup = list(list(integer(0L))), count=0L, type_count=0L)]
   if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
@@ -154,7 +154,7 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k
   .Call(Clookup, uy, nrow(y), indices(uy, y, yintervals, nomatch=0L, roll=roll), maxgap, minoverlap, mult, type, verbose)
   if (maxgap == 0L && minoverlap == 1L) {
     # iintervals = tail(names(x), 2L)    # iintervals not yet used so commented out for now
-    if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()}
+    if (verbose) {last.started.at=proc.time();catf("binary search(es) done in ...");flush.console()}
     xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll)
     if (verbose) {cat(timetaken(last.started.at),"\n");flush.console()}
     olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose)
diff --git a/R/frank.R b/R/frank.R
index 763b8267e5..47e701c4cd 100644
--- a/R/frank.R
+++ b/R/frank.R
@@ -22,10 +22,13 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a
     if (!length(cols))
       stop("x is a list, 'cols' can not be 0-length")
   }
-  x = .shallow(x, cols) # shallow copy even if list..
+  # need to unlock for #4429
+  x = .shallow(x, cols, unlock = TRUE) # shallow copy even if list..
   setDT(x)
   cols = seq_along(cols)
   if (is.na(na.last)) {
+    if ("..na_prefix.." %chin% names(x))
+      stop("Input column '..na_prefix..' conflicts with data.table internal usage; please rename")
     set(x, j = "..na_prefix..", value = is_na(x, cols))
     order = if (length(order) == 1L) c(1L, rep(order, length(cols))) else c(1L, order)
     cols = c(ncol(x), cols)
@@ -39,6 +42,8 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a
       idx = NULL
       n = nrow(x)
     }
+    if ('..stats_runif..' %chin% names(x))
+      stop("Input column '..stats_runif..' conflicts with data.table internal usage; please rename")
     set(x, idx, '..stats_runif..', stats::runif(n))
     order = if (length(order) == 1L) c(rep(order, length(cols)), 1L) else c(order, 1L)
     cols = c(cols, ncol(x))
diff --git a/R/fread.R b/R/fread.R
index 0da96fe0e4..eb765fe639 100644
--- a/R/fread.R
+++ b/R/fread.R
@@ -21,21 +21,26 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
   if (length(encoding) != 1L || !encoding %chin% c("unknown", "UTF-8", "Latin-1")) {
     stop("Argument 'encoding' must be 'unknown', 'UTF-8' or 'Latin-1'.")
   }
-  stopifnot( isTRUEorFALSE(strip.white), isTRUEorFALSE(blank.lines.skip), isTRUEorFALSE(fill), isTRUEorFALSE(showProgress),
-             isTRUEorFALSE(verbose), isTRUEorFALSE(check.names), isTRUEorFALSE(logical01), isTRUEorFALSE(keepLeadingZeros), isTRUEorFALSE(yaml) )
-  stopifnot( isTRUEorFALSE(stringsAsFactors) || (is.double(stringsAsFactors) && length(stringsAsFactors)==1L && 0.0<=stringsAsFactors && stringsAsFactors<=1.0))
-  stopifnot( is.numeric(nrows), length(nrows)==1L )
-  if (is.na(nrows) || nrows<0L) nrows=Inf   # accept -1 to mean Inf, as read.table does
+  stopifnot(
+    isTRUEorFALSE(strip.white), isTRUEorFALSE(blank.lines.skip), isTRUEorFALSE(fill), isTRUEorFALSE(showProgress),
+    isTRUEorFALSE(verbose), isTRUEorFALSE(check.names), isTRUEorFALSE(logical01), isTRUEorFALSE(keepLeadingZeros), isTRUEorFALSE(yaml),
+    isTRUEorFALSE(stringsAsFactors) || (is.double(stringsAsFactors) && length(stringsAsFactors)==1L && 0.0<=stringsAsFactors && stringsAsFactors<=1.0),
+    is.numeric(nrows), length(nrows)==1L
+  )
+  nrows=as.double(nrows) #4686
+  if (is.na(nrows) || nrows<0) nrows=Inf   # accept -1 to mean Inf, as read.table does
   if (identical(header,"auto")) header=NA
-  stopifnot(is.logical(header) && length(header)==1L)  # TRUE, FALSE or NA
-  stopifnot(is.numeric(nThread) && length(nThread)==1L)
+  stopifnot(
+    is.logical(header) && length(header)==1L,  # TRUE, FALSE or NA
+    is.numeric(nThread) && length(nThread)==1L
+  )
   nThread=as.integer(nThread)
   stopifnot(nThread>=1L)
   if (!is.null(text)) {
     if (!is.character(text)) stop("'text=' is type ", typeof(text), " but must be character.")
     if (!length(text)) return(data.table())
     if (length(text) > 1L) {
-      cat(text, file=(tmpFile<-tempfile(tmpdir=tmpdir)), sep="\n")  # avoid paste0() which could create a new very long single string in R's memory
+      writeLines(text, tmpFile<-tempfile(tmpdir=tmpdir))  # avoid paste0() which could create a new very long single string in R's memory
       file = tmpFile
       on.exit(unlink(tmpFile), add=TRUE)
     } else {
@@ -50,13 +55,11 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
     if (input=="" || length(grep('\\n|\\r', input))) {
       # input is data itself containing at least one \n or \r
     } else {
-      if (substring(input,1L,1L)==" ") {
+      if (startsWith(input, " ")) {
         stop("input= contains no \\n or \\r, but starts with a space. Please remove the leading space, or use text=, file= or cmd=")
       }
-      str6 = substring(input,1L,6L)   # avoid grepl() for #2531
-      str7 = substring(input,1L,7L)
-      str8 = substring(input,1L,8L)
-      if (str7=="ftps://" || str8=="https://") {
+      str7 = substr(input, 1L, 7L) # avoid grepl() for #2531
+      if (str7=="ftps://" || startsWith(input, "https://")) {
         # nocov start
         if (!requireNamespace("curl", quietly = TRUE))
           stop("Input URL requires https:// connection for which fread() requires 'curl' package which cannot be found. Please install 'curl' using 'install.packages('curl')'.") # nocov
@@ -66,7 +69,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
         on.exit(unlink(tmpFile), add=TRUE)
         # nocov end
       }
-      else if (str6=="ftp://" || str7== "http://" || str7=="file://") {
+      else if (startsWith(input, "ftp://") || str7== "http://" || str7=="file://") {
         # nocov start
         method = if (str7=="file://") "internal" else getOption("download.file.method", default="auto")
         # force "auto" when file:// to ensure we don't use an invalid option (e.g. wget), #1668
@@ -80,7 +83,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
       else if (length(grep(' ', input, fixed = TRUE)) && !file.exists(input)) {  # file name or path containing spaces is not a command
         cmd = input
         if (input_has_vars && getOption("datatable.fread.input.cmd.message", TRUE)) {
-          message("Taking input= as a system command ('",cmd,"') and a variable has been used in the expression passed to `input=`. Please use fread(cmd=...). There is a security concern if you are creating an app, and the app could have a malicious user, and the app is not running in a secure environment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.")
+          message("Taking input= as a system command because it contains a space ('",cmd,"'). If it's a filename please remove the space, or use file= explicitly. A variable is being passed to input= and when this is taken as a system command there is a security concern if you are creating an app, the app could have a malicious user, and the app is not running in a secure environment; e.g. the app is running as root. Please read item 5 in the NEWS file for v1.11.6 for more information and for the option to suppress this message.")
         }
       }
       else {
@@ -102,12 +105,10 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
               if (data.table) 'data.table' else 'data.frame', ".")
       return(if (data.table) data.table(NULL) else data.frame(NULL))
     }
-    ext2 = substring(file, nchar(file)-2L, nchar(file))   # last 3 characters ".gz"
-    ext3 = substring(file, nchar(file)-3L, nchar(file))   # last 4 characters ".bz2"
-    if (ext2==".gz" || ext3==".bz2") {
+    if ((is_gz <- endsWith(file, ".gz")) || endsWith(file, ".bz2")) {
       if (!requireNamespace("R.utils", quietly = TRUE))
         stop("To read gz and bz2 files directly, fread() requires 'R.utils' package which cannot be found. Please install 'R.utils' using 'install.packages('R.utils')'.") # nocov
-      FUN = if (ext2==".gz") gzfile else bzfile
+      FUN = if (is_gz) gzfile else bzfile
       R.utils::decompressFile(file, decompFile<-tempfile(tmpdir=tmpdir), ext=NULL, FUN=FUN, remove=FALSE)   # ext is not used by decompressFile when destname is supplied, but isn't optional
       file = decompFile   # don't use 'tmpFile' symbol again, as tmpFile might be the http://domain.org/file.csv.gz download
       on.exit(unlink(decompFile), add=TRUE)
@@ -169,9 +170,10 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
     yaml_border_re = '^#?---'
     if (!grepl(yaml_border_re, first_line)) {
       close(f)
-      stop('Encountered <', substring(first_line, 1L, 50L), if (nchar(first_line) > 50L) '...', '> at the first ',
-           'unskipped line (', 1L+skip, '), which does not constitute the start to a valid YAML header ',
-           '(expecting something matching regex "', yaml_border_re, '"); please check your input and try again.')
+      stop(gettextf(
+        'Encountered <%s%s> at the first unskipped line (%d), which does not constitute the start to a valid YAML header (expecting something matching regex "%s"); please check your input and try again.',
+        substr(first_line, 1L, 50L), if (nchar(first_line) > 50L) '...' else '', 1L+skip, yaml_border_re
+      ))
     }
 
     yaml_comment_re = '^#'
@@ -193,7 +195,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
 
     yaml_header = yaml::yaml.load(yaml_string)
     yaml_names = names(yaml_header)
-    if (verbose) cat('Processed', n_read, 'lines of YAML metadata with the following top-level fields:', brackify(yaml_names), '\n')
+    if (verbose) catf('Processed %d lines of YAML metadata with the following top-level fields: %s\n', n_read, brackify(yaml_names))
     # process header first since it impacts how to handle colClasses
     if ('header' %chin% yaml_names) {
       if ('header' %chin% call_args) message("User-supplied 'header' will override that found in metadata.")
@@ -201,7 +203,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
     }
     if ('schema' %chin% yaml_names) {
       new_types = sapply(yaml_header$schema$fields, `[[`, 'type')
-      if (any(null_idx <- sapply(new_types, is.null)))
+      if (any(null_idx <- vapply_1b(new_types, is.null)))
         new_types = do.call(c, new_types)
       synonms = rbindlist(list(
         character = list(syn = c('character', 'string')),
@@ -325,7 +327,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
     } else {
       cols_to_factor = which(vapply_1b(ans, is.character))
     }
-    if (verbose) cat("stringsAsFactors=", stringsAsFactors, " converted ", length(cols_to_factor), " column(s): ", brackify(names(ans)[cols_to_factor]), "\n", sep="")
+    if (verbose) catf("stringsAsFactors=%s converted %d column(s): %s\n", stringsAsFactors, length(cols_to_factor), brackify(names(ans)[cols_to_factor]))
     for (j in cols_to_factor) set(ans, j=j, value=as_factor(.subset2(ans, j)))
   }
 
@@ -341,10 +343,10 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
   }
   if (yaml) setattr(ans, 'yaml_metadata', yaml_header)
   if (!is.null(index) && data.table) {
-    if (!all(sapply(index, is.character)))
+    if (!all(vapply_1b(index, is.character)))
       stop("index argument of data.table() must be a character vector naming columns (NB: col.names are applied before this)")
     if (is.list(index)) {
-      to_split = sapply(index, length) == 1L
+      to_split = vapply_1i(index, length) == 1L
       if (any(to_split))
         index[to_split] = sapply(index[to_split], strsplit, split = ",", fixed = TRUE)
     } else {
diff --git a/R/fwrite.R b/R/fwrite.R
index 1971c0e4ea..8325f137d3 100644
--- a/R/fwrite.R
+++ b/R/fwrite.R
@@ -1,5 +1,6 @@
 fwrite = function(x, file="", append=FALSE, quote="auto",
-           sep=",", sep2=c("","|",""), eol=if (.Platform$OS.type=="windows") "\r\n" else "\n",
+           sep=getOption("datatable.fwrite.sep", ","),
+           sep2=c("","|",""), eol=if (.Platform$OS.type=="windows") "\r\n" else "\n",
            na="", dec=".", row.names=FALSE, col.names=TRUE,
            qmethod=c("double","escape"),
            logical01=getOption("datatable.logical01", FALSE), # due to change to TRUE; see NEWS
@@ -11,8 +12,12 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
            compress = c("auto", "none", "gzip"),
            yaml = FALSE,
            bom = FALSE,
-           verbose=getOption("datatable.verbose", FALSE)) {
+           verbose=getOption("datatable.verbose", FALSE),
+           encoding = "") {
   na = as.character(na[1L]) # fix for #1725
+  if (length(encoding) != 1L || !encoding %chin% c("", "UTF-8", "native")) {
+    stop("Argument 'encoding' must be '', 'UTF-8' or 'native'.")
+  }
   if (missing(qmethod)) qmethod = qmethod[1L]
   if (missing(compress)) compress = compress[1L]
   if (missing(dateTimeAs)) { dateTimeAs = dateTimeAs[1L] }
@@ -58,7 +63,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
   file = path.expand(file)  # "~/foo/bar"
   if (append && (file=="" || file.exists(file))) {
     if (missing(col.names)) col.names = FALSE
-    if (verbose) cat("Appending to existing file so setting bom=FALSE and yaml=FALSE\n")
+    if (verbose) catf("Appending to existing file so setting bom=FALSE and yaml=FALSE\n")
     bom = FALSE
     yaml = FALSE
   }
@@ -108,7 +113,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto",
   file = enc2native(file) # CfwriteR cannot handle UTF-8 if that is not the native encoding, see #3078.
   .Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append,
         row.names, col.names, logical01, scipen, dateTimeAs, buffMB, nThread,
-        showProgress, is_gzip, bom, yaml, verbose)
+        showProgress, is_gzip, bom, yaml, verbose, encoding)
   invisible()
 }
 
diff --git a/R/groupingsets.R b/R/groupingsets.R
index 6281615dd5..2300d09da0 100644
--- a/R/groupingsets.R
+++ b/R/groupingsets.R
@@ -27,10 +27,12 @@ cube.data.table = function(x, j, by, .SDcols, id = FALSE, ...) {
     stop("Argument 'by' must be a character vector of column names used in grouping.")
   if (!is.logical(id))
     stop("Argument 'id' must be a logical scalar.")
+  if (missing(j))
+    stop("Argument 'j' is required")
   # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497
   n = length(by)
   keepBool = sapply(2L^(seq_len(n)-1L), function(k) rep(c(FALSE, TRUE), times=k, each=((2L^n)/(2L*k))))
-  sets = lapply((2L^n):1L, function(j) by[keepBool[j, ]])
+  sets = lapply((2L^n):1L, function(jj) by[keepBool[jj, ]])
   # redirect to workhorse function
   jj = substitute(j)
   groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj)
@@ -51,7 +53,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
     stop("Argument 'by' must be a character vector of column names used in grouping.")
   if (anyDuplicated(by) > 0L)
     stop("Argument 'by' must have unique column names for grouping.")
-  if (!is.list(sets) || !all(sapply(sets, is.character)))
+  if (!is.list(sets) || !all(vapply_1b(sets, is.character)))
     stop("Argument 'sets' must be a list of character vectors.")
   if (!is.logical(id))
     stop("Argument 'id' must be a logical scalar.")
@@ -60,7 +62,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...)
     stop("All columns used in 'sets' argument must be in 'by' too. Columns used in 'sets' but not present in 'by': ", brackify(setdiff(sets.all.by, by)))
   if (id && "grouping" %chin% names(x))
     stop("When using `id=TRUE` the 'x' data.table must not have a column named 'grouping'.")
-  if (any(sapply(sets, anyDuplicated)))
+  if (any(vapply_1i(sets, anyDuplicated)))  # anyDuplicated returns index of first duplicate, otherwise 0L
     stop("Character vectors in 'sets' list must not have duplicated column names within a single grouping set.")
   if (length(sets) > 1L && (idx<-anyDuplicated(lapply(sets, sort))))
     warning("'sets' contains a duplicate (i.e., equivalent up to sorting) element at index ", idx, "; as such, there will be duplicate rows in the output -- note that grouping by A,B and B,A will produce the same aggregations. Use `sets=unique(lapply(sets, sort))` to eliminate duplicates.")
diff --git a/R/last.R b/R/last.R
index abf4050b40..8dff3271a1 100644
--- a/R/last.R
+++ b/R/last.R
@@ -7,12 +7,12 @@ last = function(x, n=1L, ...) {
     if (nargs()>1L) {
       if ("package:xts" %chin% search()) {
         if (verbose)
-          cat("last: using xts::last: !is.xts(x) & nargs>1 & 'package:xts'%in%search()\n")
+          catf("%s: using %s: %s\n", "last", "xts::last", "!is.xts(x) & nargs>1 & 'package:xts'%in%search()")
         xts::last(x, n=n, ...)
       } else {
         # nocov start
         if (verbose)
-          cat("last: using utils::tail: !is.xts(x) & nargs>1 & !'package:xts'%in%search()\n")
+          catf("%s: using %s: %s\n", "last", "utils::tail", "!is.xts(x) & nargs>1 & !'package:xts'%in%search()")
         utils::tail(x, n=n, ...)
         # nocov end
       }
@@ -20,24 +20,24 @@ last = function(x, n=1L, ...) {
       dx = dim(x)
       if (is.null(dx)) {
         if (verbose)
-          cat("last: using 'x[[length(x)]]': !is.xts(x) & !nargs>1 & is.null(dim(x))\n")
+          catf("%s: using %s: %s\n", "last", "'x[[length(x)]]'", "!is.xts(x) & !nargs>1 & is.null(dim(x))")
         lx = length(x)
         if (!lx) x else x[[lx]]
       } else if (is.data.frame(x)) {
         if (verbose)
-          cat("last: using 'x[nrow(x),]': !is.xts(x) & !nargs>1 & is.data.frame(x)\n")
+          catf("%s: using %s: %s\n", "last", "'x[nrow(x),]'", "!is.xts(x) & !nargs>1 & is.data.frame(x)")
         x[dx[1L], , drop=FALSE]
       } else {
         if (verbose)
-          cat("last: using utils::tail: !is.xts(x) & !nargs>1 & !is.null(dim(x)) & !is.data.frame(x)\n")
+          catf("%s: using %s: %s\n", "last", "utils::tail", "!is.xts(x) & !nargs>1 & !is.null(dim(x)) & !is.data.frame(x)")
         utils::tail(x, n=n, ...)
       }
     }
   } else {
     if (!requireNamespace("xts", quietly=TRUE))
-      stop(gettextf("'xts' class passed to %s function but 'xts' is not available, you should have 'xts' installed already", "data.table::last", domain="R-data.table")) # nocov
+      stop(domain=NA, gettextf("'xts' class passed to %s function but 'xts' is not available, you should have 'xts' installed already", "data.table::last")) # nocov
     if (verbose)
-      cat("last: using xts::last: is.xts(x)\n")
+      catf("%s: using %s: %s\n", "last", "xts::last", "is.xts(x)")
     xts::last(x, n=n, ...)
   }
 }
@@ -48,12 +48,12 @@ first = function(x, n=1L, ...) {
     if (nargs()>1L) {
       if ("package:xts" %chin% search()) {
         if (verbose)
-          cat("first: using xts::first: !is.xts(x) & nargs>1 & 'package:xts'%in%search()\n")
+          catf("%s: using %s: %s\n", "first", "xts::first", "!is.xts(x) & nargs>1 & 'package:xts'%in%search()")
         xts::first(x, n=n, ...)
       } else {
         # nocov start
         if (verbose)
-          cat("first: using utils::head: !is.xts(x) & nargs>1 & !'package:xts'%in%search()\n")
+          catf("%s: using %s: %s\n", "first", "utils::head", "!is.xts(x) & nargs>1 & !'package:xts'%in%search()")
         utils::head(x, n=n, ...)
         # nocov end
       }
@@ -61,24 +61,24 @@ first = function(x, n=1L, ...) {
       dx = dim(x)
       if (is.null(dx)) {
         if (verbose)
-          cat("first: using 'x[[1L]]': !is.xts(x) & !nargs>1 & is.null(dim(x))\n")
+          catf("%s: using %s: %s\n", "first", "'x[[1L]]'", "!is.xts(x) & !nargs>1 & is.null(dim(x))")
         lx = length(x)
         if (!lx) x else x[[1L]]
       } else if (is.data.frame(x)) {
         if (verbose)
-          cat("first: using 'x[1L,]': !is.xts(x) & !nargs>1 & is.data.frame(x)\n")
+          catf("%s: using %s: %s\n", "first", "'x[1L,]'", "!is.xts(x) & !nargs>1 & is.data.frame(x)")
         if (!dx[1L]) x else x[1L, , drop=FALSE]
       } else {
         if (verbose)
-          cat("first: using utils::head: !is.xts(x) & !nargs>1 & !is.null(dim(x)) & !is.data.frame(x)\n")
+          catf("%s: using %s: %s\n", "first", "utils::head", "!is.xts(x) & !nargs>1 & !is.null(dim(x)) & !is.data.frame(x)")
         utils::head(x, n=n, ...)
       }
     }
   } else {
     if (!requireNamespace("xts", quietly=TRUE))
-      stop(gettextf("'xts' class passed to %s function but 'xts' is not available, you should have 'xts' installed already", "data.table::first", domain="R-data.table")) # nocov
+      stop(domain=NA, gettextf("'xts' class passed to %s function but 'xts' is not available, you should have 'xts' installed already", "data.table::first")) # nocov
     if (verbose)
-      cat("first: using xts::first: is.xts(x)\n")
+      catf("%s: using %s: %s\n", "first", "xts::first", "is.xts(x)")
     xts::first(x, n=n, ...)
   }
 }
diff --git a/R/like.R b/R/like.R
index c66678c643..dd2a8c5b59 100644
--- a/R/like.R
+++ b/R/like.R
@@ -3,7 +3,10 @@
 # returns 'logical' so can be combined with other where clauses.
 like = function(vector, pattern, ignore.case = FALSE, fixed = FALSE) {
   if (is.factor(vector)) {
-    as.integer(vector) %in% grep(pattern, levels(vector), ignore.case = ignore.case, fixed = fixed)
+    # indexing by factors is equivalent to indexing by the numeric codes, see ?`[` #4748
+    ret = grepl(pattern, levels(vector), ignore.case = ignore.case, fixed = fixed)[vector]
+    ret[is.na(ret)] = FALSE
+    ret
   } else {
     # most usually character, but integer and numerics will be silently coerced by grepl
     grepl(pattern, vector, ignore.case = ignore.case, fixed = fixed)
diff --git a/R/merge.R b/R/merge.R
index fe3bdb4549..8dc59e018b 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -11,9 +11,17 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
       by = key(x)
     }
   }
-  if ((x0 <- length(x)==0L) | (y0 <- length(y)==0L)) warning("You are trying to join data.tables where ", if(x0 & y0) "'x' and 'y' arguments are" else if(x0 & !y0) "'x' argument is" else if(!x0 & y0) "'y' argument is", " 0 columns data.table.")
-  if (any(duplicated(names(x)))) stop("x has some duplicated column name(s): ",paste(names(x)[duplicated(names(x))],collapse=","),". Please remove or rename the duplicate(s) and try again.")
-  if (any(duplicated(names(y)))) stop("y has some duplicated column name(s): ",paste(names(y)[duplicated(names(y))],collapse=","),". Please remove or rename the duplicate(s) and try again.")
+  x0 = length(x)==0L
+  y0 = length(y)==0L
+  if (x0 || y0) warning(sprintf(ngettext(x0+y0,
+    "You are trying to join data.tables where %s has 0 columns.",
+    "You are trying to join data.tables where %s have 0 columns."),
+    if (x0 && y0) "'x' and 'y'" else if (x0) "'x'" else "'y'"
+  ))
+  nm_x = names(x)
+  nm_y = names(y)
+  if (anyDuplicated(nm_x)) stop(gettextf("%s has some duplicated column name(s): %s. Please remove or rename the duplicate(s) and try again.", "x", brackify(nm_x[duplicated(nm_x)])))
+  if (anyDuplicated(nm_y)) stop(gettextf("%s has some duplicated column name(s): %s. Please remove or rename the duplicate(s) and try again.", "y", brackify(nm_y[duplicated(nm_y)])))
 
   ## set up 'by'/'by.x'/'by.y'
   if ( (!is.null(by.x) || !is.null(by.y)) && length(by.x)!=length(by.y) )
@@ -21,11 +29,11 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   if (!missing(by) && !missing(by.x))
     warning("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.")
   if (!is.null(by.x)) {
-    if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
+    if (length(by.x)==0L || !is.character(by.x) || !is.character(by.y))
       stop("A non-empty vector of column names is required for `by.x` and `by.y`.")
-    if (!all(by.x %chin% names(x)))
+    if (!all(by.x %chin% nm_x))
       stop("Elements listed in `by.x` must be valid column names in x.")
-    if (!all(by.y %chin% names(y)))
+    if (!all(by.y %chin% nm_y))
       stop("Elements listed in `by.y` must be valid column names in y.")
     by = by.x
     names(by) = by.y
@@ -35,10 +43,10 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     if (is.null(by))
       by = key(x)
     if (is.null(by))
-      by = intersect(names(x), names(y))
+      by = intersect(nm_x, nm_y)
     if (length(by) == 0L || !is.character(by))
       stop("A non-empty vector of column names for `by` is required.")
-    if (!all(by %chin% intersect(colnames(x), colnames(y))))
+    if (!all(by %chin% intersect(nm_x, nm_y)))
       stop("Elements listed in `by` must be valid column names in x and y")
     by = unname(by)
     by.x = by.y = by
@@ -47,8 +55,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
   ## sidestep the auto-increment column number feature-leading-to-bug by
   ## ensuring no names end in ".1", see unit test
   ## "merge and auto-increment columns in y[x]" in test-data.frame.like.R
-  start = setdiff(names(x), by.x)
-  end = setdiff(names(y), by.y)
+  start = setdiff(nm_x, by.x)
+  end = setdiff(nm_y, by.y)
   dupnames = intersect(start, end)
   if (length(dupnames)) {
     start[chmatch(dupnames, start, 0L)] = paste0(dupnames, suffixes[1L])
@@ -68,7 +76,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
     if (length(missingyidx)) {
       yy = y[missingyidx]
-      othercolsx = setdiff(names(x), by)
+      othercolsx = setdiff(nm_x, by)
       if (length(othercolsx)) {
         tmp = rep.int(NA_integer_, length(missingyidx))
         # TO DO: use set() here instead..
@@ -80,7 +88,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     }
   }
   # X[Y] syntax puts JIS i columns at the end, merge likes them alongside i.
-  newend = setdiff(names(y), by.y)
+  newend = setdiff(nm_y, by.y)
   # fix for #1290, make sure by.y order is set properly before naming
   setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
   setnames(dt, c(by.x, start, end))
diff --git a/R/onAttach.R b/R/onAttach.R
index 75b48eb394..3e93187e2e 100644
--- a/R/onAttach.R
+++ b/R/onAttach.R
@@ -19,13 +19,14 @@
   dev = as.integer(v[1L, 3L]) %% 2L == 1L  # version number odd => dev
   if (!isTRUE(getOption("datatable.quiet"))) {   # new option in v1.12.4, #3489
     packageStartupMessage("data.table ", v, if(dev)paste0(" IN DEVELOPMENT built ",d,g),
-                          " using ", getDTthreads(verbose=FALSE), " threads (see ?getDTthreads).  Latest news: r-datatable.com")
-    if (gettext("TRANSLATION CHECK", domain='R-data.table') != "TRANSLATION CHECK")
-      packageStartupMessage(gettext("**********\nRunning data.table in English; package support is available in English only. When searching for online help, be sure to also check for the English error message. This can be obtained by looking at the po/R-<locale>.po and po/<locale>.po files in the package source, where the native language and English error messages can be found side-by-side\n**********", domain="R-data.table"))
+                          " using ", getDTthreads(verbose=FALSE), " threads (see ?getDTthreads).  Latest news: r-datatable.com", domain="R-data.table")
+    # NB: domain= is necessary in .onAttach and .onLoad, see ?gettext and https://bugs.r-project.org/bugzilla/show_bug.cgi?id=18092.
+    if (gettext(domain="R-data.table", "TRANSLATION CHECK") != "TRANSLATION CHECK")
+      packageStartupMessage(domain="R-data.table", "**********\nRunning data.table in English; package support is available in English only. When searching for online help, be sure to also check for the English error message. This can be obtained by looking at the po/R-<locale>.po and po/<locale>.po files in the package source, where the native language and English error messages can be found side-by-side\n**********")
     if (dev && (Sys.Date() - as.Date(d))>28L)
-      packageStartupMessage("**********\nThis development version of data.table was built more than 4 weeks ago. Please update: data.table::update.dev.pkg()\n**********")
+      packageStartupMessage(domain="R-data.table", "**********\nThis development version of data.table was built more than 4 weeks ago. Please update: data.table::update.dev.pkg()\n**********")
     if (!.Call(ChasOpenMP))
-      packageStartupMessage("**********\n",
+      packageStartupMessage(domain="R-data.table", "**********\n",
         "This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.\n",
         if (Sys.info()["sysname"]=="Darwin")
           "This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux."
diff --git a/R/onLoad.R b/R/onLoad.R
index 230929c4b6..3750510ece 100644
--- a/R/onLoad.R
+++ b/R/onLoad.R
@@ -25,11 +25,12 @@
     if (dllV != RV) {
       dll = if (.Platform$OS.type=="windows") "dll" else "so"
       # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17478
-      stop("The datatable.",dll," version (",dllV,") does not match the package (",RV,"). Please close all R sessions to release the old ",toupper(dll)," and reinstall data.table in a fresh R session. The root cause is that R's package installer can in some unconfirmed circumstances leave a package in a state that is apparently functional but where new R code is calling old C code silently: https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17478. Once a package is in this mismatch state it may produce wrong results silently until you next upgrade the package. Please help by adding precise circumstances to 17478 to move the status to confirmed. This mismatch between R and C code can happen with any package not just data.table. It is just that data.table has added this check.")
+      # NB: domain= is necessary in .onAttach and .onLoad, see ?gettext and https://bugs.r-project.org/bugzilla/show_bug.cgi?id=18092.
+      stop(domain="R-data.table", "The datatable.",dll," version (",dllV,") does not match the package (",RV,"). Please close all R sessions to release the old ",toupper(dll)," and reinstall data.table in a fresh R session. The root cause is that R's package installer can in some unconfirmed circumstances leave a package in a state that is apparently functional but where new R code is calling old C code silently: https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17478. Once a package is in this mismatch state it may produce wrong results silently until you next upgrade the package. Please help by adding precise circumstances to 17478 to move the status to confirmed. This mismatch between R and C code can happen with any package not just data.table. It is just that data.table has added this check.")
     }
     builtUsing = readRDS(system.file("Meta/package.rds",package="data.table"))$Built$R
     if (!identical(base::getRversion()>="4.0.0", builtUsing>="4.0.0")) {
-      stop("This is R ", base::getRversion(), " but data.table has been installed using R ",builtUsing,". The major version must match. Please reinstall data.table.")
+      stop(domain="R-data.table", "This is R ", base::getRversion(), " but data.table has been installed using R ",builtUsing,". The major version must match. Please reinstall data.table.")
       # the if(R>=4.0.0) in NAMESPACE when registering S3 methods rbind.data.table and cbind.data.table happens on install; #3968
     }
   }
@@ -93,14 +94,14 @@
   }
 
   if (!is.null(getOption("datatable.old.bywithoutby")))
-    warning("Option 'datatable.old.bywithoutby' has been removed as warned for 2 years. It is now ignored. Please use by=.EACHI instead and stop using this option.")
+    warning(domain="R-data.table", "Option 'datatable.old.bywithoutby' has been removed as warned for 2 years. It is now ignored. Please use by=.EACHI instead and stop using this option.")
   if (!is.null(getOption("datatable.old.unique.by.key")))
-    warning("Option 'datatable.old.unique.by.key' has been removed as warned for 4 years. It is now ignored. Please use by=key(DT) instead and stop using this option.")
+    warning(domain="R-data.table", "Option 'datatable.old.unique.by.key' has been removed as warned for 4 years. It is now ignored. Please use by=key(DT) instead and stop using this option.")
 
   # Test R behaviour that changed in v3.1 and is now depended on
   x = 1L:3L
   y = list(x)
-  if (address(x) != address(y[[1L]])) stop("Unexpected base R behaviour: list(x) has copied x")
+  if (address(x) != address(y[[1L]])) stop(domain="R-data.table", "Unexpected base R behaviour: list(x) has copied x")
 
   DF = data.frame(a=1:3, b=4:6)
   add1 = address(DF$a)
@@ -108,7 +109,7 @@
   names(DF) = c("A","B")
   add3 = address(DF$A)
   add4 = address(DF$B)
-  if (add1!=add3 || add2!=add4) stop("Unexpected base R behaviour: names<- has copied column contents")
+  if (add1!=add3 || add2!=add4) stop(domain="R-data.table", "Unexpected base R behaviour: names<- has copied column contents")
 
   DF = data.frame(a=1:3, b=4:6)
   add1 = address(DF$a)
@@ -118,10 +119,10 @@
   add4 = address(DF$a)
   add5 = address(DF$b)
   add6 = address(DF)
-  if (add2==add5) stop("Unexpected base R behaviour: DF[2,2]<- did not copy column 2 which was assigned to")
-  if (add1!=add4) stop("Unexpected base R behaviour: DF[2,2]<- copied the first column which was not assigned to, too")
+  if (add2==add5) stop(domain="R-data.table", "Unexpected base R behaviour: DF[2,2]<- did not copy column 2 which was assigned to")
+  if (add1!=add4) stop(domain="R-data.table", "Unexpected base R behaviour: DF[2,2]<- copied the first column which was not assigned to, too")
 
-  if (add3==add6) warning("Unexpected base R behaviour: DF[2,2]<- has not copied address(DF)")
+  if (add3==add6) warning(domain="R-data.table", "Unexpected base R behaviour: DF[2,2]<- has not copied address(DF)")
   # R could feasibly in future not copy DF's vecsxp in this case. If that changes in R, we'd like to know via the warning
   # because tests will likely break too. The warning will quickly tell R-core and us why, so we can then update.
 
diff --git a/R/print.data.table.R b/R/print.data.table.R
index 31a009d5b4..4e666ca22e 100644
--- a/R/print.data.table.R
+++ b/R/print.data.table.R
@@ -15,6 +15,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
   # trunc.cols - should only the columns be printed that can fit in the console? (FALSE)
   if (!col.names %chin% c("auto", "top", "none"))
     stop("Valid options for col.names are 'auto', 'top', and 'none'")
+  if (length(trunc.cols) != 1L || !is.logical(trunc.cols) || is.na(trunc.cols))
+    stop("Valid options for trunc.cols are TRUE and FALSE")
   if (col.names == "none" && class)
     warning("Column classes will be suppressed when col.names is 'none'")
   if (!shouldPrint(x)) {
@@ -41,31 +43,34 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
   if (!is.numeric(topn)) topn = 5L
   topnmiss = missing(topn)
   topn = max(as.integer(topn),1L)
-  if (print.keys){
+  if (print.keys) {
     if (!is.null(ky <- key(x)))
-    cat("Key: <", paste(ky, collapse=", "), ">\n", sep="")
+    catf("Key: <%s>\n", toString(ky))
     if (!is.null(ixs <- indices(x)))
-    cat("Ind", if (length(ixs) > 1L) "ices" else "ex", ": <",
-      paste(ixs, collapse=">, <"), ">\n", sep="")
+    cat(sprintf(
+      ngettext(length(ixs), "Index: %s\n", "Indices: %s\n"),
+      paste0("<", ixs, ">", collapse = ", ")
+    ))
   }
   if (any(dim(x)==0L)) {
     class = if (is.data.table(x)) "table" else "frame"  # a data.frame could be passed to print.data.table() directly, #3363
     if (all(dim(x)==0L)) {
-      cat("Null data.",class," (0 rows and 0 cols)\n", sep="")  # See FAQ 2.5 and NEWS item in v1.8.9
+      catf("Null data.%s (0 rows and 0 cols)\n", class)  # See FAQ 2.5 and NEWS item in v1.8.9
     } else {
-      cat("Empty data.",class," (", dim(x)[1L], " rows and ",length(x)," cols)", sep="")
+      catf("Empty data.%s (%d rows and %d cols)", class, NROW(x), NCOL(x))
       if (length(x)>0L) cat(": ",paste(head(names(x),6L),collapse=","),if(length(x)>6L)"...",sep="")
       cat("\n")
     }
     return(invisible(x))
   }
-  if ((topn*2L+1L)<nrow(x) && (nrow(x)>nrows || !topnmiss)) {
+  n_x = nrow(x)
+  if ((topn*2L+1L)<n_x && (n_x>nrows || !topnmiss)) {
     toprint = rbindlist(list(head(x, topn), tail(x, topn)), use.names=FALSE)  # no need to match names because head and tail of same x, and #3306
-    rn = c(seq_len(topn), seq.int(to=nrow(x), length.out=topn))
+    rn = c(seq_len(topn), seq.int(to=n_x, length.out=topn))
     printdots = TRUE
   } else {
     toprint = x
-    rn = seq_len(nrow(x))
+    rn = seq_len(n_x)
     printdots = FALSE
   }
   toprint=format.data.table(toprint, na.encode=FALSE, timezone = timezone, ...)  # na.encode=FALSE so that NA in character cols print as <NA>
@@ -93,7 +98,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
   if (quote) colnames(toprint) <- paste0('"', old <- colnames(toprint), '"')
   if (isTRUE(trunc.cols)) {
     # allow truncation of columns to print only what will fit in console PR #4074
-    widths = dt_width(toprint, class, row.names, col.names)
+    widths = dt_width(toprint, n_x, class, row.names, col.names)
     cons_width = getOption("width")
     cols_to_print = widths < cons_width
     not_printed = colnames(toprint)[!cols_to_print]
@@ -109,7 +114,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
     toprint = rbind(head(toprint, topn + isTRUE(class)), "---"="", tail(toprint, topn))
     rownames(toprint) = format(rownames(toprint), justify="right")
     if (col.names == "none") {
-      cut_top(print(toprint, right=TRUE, quote=quote))
+      cut_colnames(print(toprint, right=TRUE, quote=quote))
     } else {
       print(toprint, right=TRUE, quote=quote)
     }
@@ -124,7 +129,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"),
     #   option to shut this off per request of Oleg Bondar on SO, #1482
     toprint=rbind(toprint, matrix(if (quote) old else colnames(toprint), nrow=1L)) # fixes bug #97
   if (col.names == "none") {
-    cut_top(print(toprint, right=TRUE, quote=quote))
+    cut_colnames(print(toprint, right=TRUE, quote=quote))
   } else {
     print(toprint, right=TRUE, quote=quote)
   }
@@ -187,7 +192,8 @@ shouldPrint = function(x) {
 
 # for removing the head (column names) of matrix output entirely,
 #   as opposed to printing a blank line, for excluding col.names per PR #1483
-cut_top = function(x) cat(capture.output(x)[-1L], sep = '\n')
+# be sure to remove colnames from any row where they exist, #4270
+cut_colnames = function(x) writeLines(grep("^\\s*(?:[0-9]+:|---)", capture.output(x), value=TRUE))
 
 # for printing the dims for list columns #3671; used by format.data.table()
 paste_dims = function(x) {
@@ -202,12 +208,13 @@ paste_dims = function(x) {
 # to calculate widths of data.table for PR #4074
 # gets the width of the data.table at each column
 #   and compares it to the console width
-dt_width = function(x, class, row.names, col.names) {
+# pass nrow because x is the head/tail only so nrow(x) is wrong, #4266
+dt_width = function(x, nrow, class, row.names, col.names) {
   widths = apply(nchar(x, type='width'), 2L, max)
   if (class) widths = pmax(widths, 6L)
-  if (col.names != "none") names = sapply(colnames(x), nchar, type = "width") else names = 0L
+  if (col.names != "none") names = sapply(colnames(x), nchar, type="width") else names = 0L
   dt_widths = pmax(widths, names)
-  rownum_width = if (row.names) as.integer(ceiling(log10(nrow(x)))+2) else 0L
+  rownum_width = if (row.names) as.integer(ceiling(log10(nrow))+2) else 0L
   cumsum(dt_widths + 1L) + rownum_width
 }
 # keeps the dim and dimnames attributes
diff --git a/R/setkey.R b/R/setkey.R
index 1f3763b1f6..e9f18398ab 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -88,12 +88,12 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
     if (verbose) {
       tt = suppressMessages(system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)))  # system.time does a gc, so we don't want this always on, until refcnt is on by default in R
       # suppress needed for tests 644 and 645 in verbose mode
-      cat("forder took", tt["user.self"]+tt["sys.self"], "sec\n")
+      catf("forder took %.03f sec\n", tt["user.self"]+tt["sys.self"])
     } else {
       o = forderv(x, cols, sort=TRUE, retGrp=FALSE)
     }
   } else {
-    if (verbose) cat("setkey on columns ", brackify(cols), " using existing index '", newkey, "'\n", sep="")
+    if (verbose) catf("setkey on columns %s using existing index '%s'\n", brackify(cols), newkey)
     o = getindex(x, newkey)
   }
   if (!physical) {
@@ -105,9 +105,9 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
   if (length(o)) {
     if (verbose) { last.started.at = proc.time() }
     .Call(Creorder,x,o)
-    if (verbose) { cat("reorder took", timetaken(last.started.at), "\n"); flush.console() }
+    if (verbose) { catf("reorder took %s\n", timetaken(last.started.at)); flush.console() }
   } else {
-    if (verbose) cat("x is already ordered by these columns, no need to call reorder\n")
+    if (verbose) catf("x is already ordered by these columns, no need to call reorder\n")
   } # else empty integer() from forderv means x is already ordered by those cols, nothing to do.
   setattr(x,"sorted",cols)
   invisible(x)
@@ -184,7 +184,7 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las
 forder = function(..., na.last=TRUE, decreasing=FALSE)
 {
   sub = substitute(list(...))
-  tt = sapply(sub, function(x) is.null(x) || (is.symbol(x) && !nzchar(x)))
+  tt = vapply_1b(sub, function(x) is.null(x) || (is.symbol(x) && !nzchar(x)))
   if (any(tt)) sub[tt] = NULL  # remove any NULL or empty arguments; e.g. test 1962.052: forder(DT, NULL) and forder(DT, )
   if (length(sub)<2L) return(NULL)  # forder() with no arguments returns NULL consistent with base::order
   asc = rep.int(1L, length(sub)-1L)  # ascending (1) or descending (-1) per column
@@ -295,7 +295,7 @@ setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE)
   o = forderv(x, cols, sort=TRUE, retGrp=FALSE, order=order, na.last=na.last)
   if (length(o)) {
     .Call(Creorder, x, o)
-    if (is.data.frame(x) & !is.data.table(x)) {
+    if (is.data.frame(x) && !is.data.table(x)) {
       setattr(x, 'row.names', rownames(x)[o])
     }
     k = key(x)
@@ -352,7 +352,7 @@ CJ = function(..., sorted = TRUE, unique = FALSE)
     }
   }
   nrow = prod( vapply_1i(l, length) )  # lengths(l) will work from R 3.2.0
-  if (nrow > .Machine$integer.max) stop(gettextf("Cross product of elements provided to CJ() would result in %.0f rows which exceeds .Machine$integer.max == %d", nrow, .Machine$integer.max, domain='R-data.table'))
+  if (nrow > .Machine$integer.max) stop(domain=NA, gettextf("Cross product of elements provided to CJ() would result in %.0f rows which exceeds .Machine$integer.max == %d", nrow, .Machine$integer.max))
   l = .Call(Ccj, l)
   setDT(l)
   l = setalloccol(l)  # a tiny bit wasteful to over-allocate a fixed join table (column slots only), doing it anyway for consistency since
diff --git a/R/setops.R b/R/setops.R
index b6dcd7b0b2..d8fcb9dfcf 100644
--- a/R/setops.R
+++ b/R/setops.R
@@ -63,7 +63,7 @@ fintersect = function(x, y, all=FALSE) {
     x = shallow(x)[, ".seqn" := rowidv(x)]
     y = shallow(y)[, ".seqn" := rowidv(y)]
     jn.on = c(".seqn",setdiff(names(y),".seqn"))
-    # fixes #4716 by preserving order of 1st (uses y[x] join) argument instead of 2nd (uses x[y] join) 
+    # fixes #4716 by preserving order of 1st (uses y[x] join) argument instead of 2nd (uses x[y] join)
     y[x, .SD, .SDcols=setdiff(names(y),".seqn"), nomatch=NULL, on=jn.on]
   } else {
     z = funique(x)  # fixes #3034. When .. prefix in i= is implemented (TODO), this can be x[funique(..y), on=, multi=]
@@ -154,17 +154,23 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu
     k1 = key(target)
     k2 = key(current)
     if (!identical(k1, k2)) {
-      return(sprintf("Datasets has different keys. 'target'%s. 'current'%s.",
-               if(length(k1)) paste0(": ", paste(k1, collapse=", ")) else " has no key",
-               if(length(k2)) paste0(": ", paste(k2, collapse=", ")) else " has no key"))
+      return(gettextf(
+        "Datasets have different %s. 'target': %s. 'current': %s.",
+        "keys",
+        if(length(k1)) brackify(k1) else gettextf("has no key"),
+        if(length(k2)) brackify(k2) else gettextf("has no key")
+      ))
     }
     # check index
     i1 = indices(target)
     i2 = indices(current)
     if (!identical(i1, i2)) {
-      return(sprintf("Datasets has different indexes. 'target'%s. 'current'%s.",
-               if(length(i1)) paste0(": ", paste(i1, collapse=", ")) else " has no index",
-               if(length(i2)) paste0(": ", paste(i2, collapse=", ")) else " has no index"))
+      return(gettextf(
+        "Datasets have different %s. 'target': %s. 'current': %s.",
+        "indices",
+        if(length(i1)) brackify(i1) else gettextf("has no index"),
+        if(length(i2)) brackify(i2) else gettextf("has no index")
+      ))
     }
 
     # Trim any extra row.names attributes that came from some inheritance
@@ -173,7 +179,7 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu
     a1 = exclude.attrs(attributes(target))
     a2 = exclude.attrs(attributes(current))
     if (length(a1) != length(a2)) return(sprintf("Datasets has different number of (non-excluded) attributes: target %s, current %s", length(a1), length(a2)))
-    if (!identical(nm1 <- sort(names(a1)), nm2 <- sort(names(a2)))) return(sprintf("Datasets has attributes with different names: %s", paste(setdiff(union(names(a1), names(a2)), intersect(names(a1), names(a2))), collapse=", ")))
+    if (!identical(nm1 <- sort(names(a1)), nm2 <- sort(names(a2)))) return(sprintf("Datasets has attributes with different names: %s", brackify(setdiff(union(names(a1), names(a2)), intersect(names(a1), names(a2))))))
     attrs.r = all.equal(a1[nm1], a2[nm2], ..., check.attributes = check.attributes)
     if (is.character(attrs.r)) return(paste("Attributes: <", attrs.r, ">")) # skip further heavy processing
   }
diff --git a/R/tables.R b/R/tables.R
index bcfab0c674..b94441c626 100644
--- a/R/tables.R
+++ b/R/tables.R
@@ -8,7 +8,7 @@ tables = function(mb=TRUE, order.col="NAME", width=80,
   all_obj = objects(envir=env, all.names=TRUE)
   is_DT = which(vapply_1b(all_obj, function(x) is.data.table(get(x, envir=env))))
   if (!length(is_DT)) {
-    if (!silent) cat("No objects of class data.table exist in", if (identical(env,.GlobalEnv)) ".GlobalEnv" else format(env), "\n")
+    if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env))
     return(invisible(data.table(NULL)))
   }
   DT_names = all_obj[is_DT]
@@ -36,7 +36,7 @@ tables = function(mb=TRUE, order.col="NAME", width=80,
     tt[ , NCOL := pretty_format(NCOL, width=4L)]
     if (mb) tt[ , MB := pretty_format(MB, width=2L)]
     print(tt, class=FALSE, nrows=Inf)
-    if (mb) cat("Total: ", prettyNum(sum(info$MB), big.mark=","), "MB\n", sep="")
+    if (mb) catf("Total: %sMB\n", prettyNum(sum(info$MB), big.mark=","))
   }
   invisible(info)
 }
diff --git a/R/test.data.table.R b/R/test.data.table.R
index c5da3e0bac..cf778c68b6 100644
--- a/R/test.data.table.R
+++ b/R/test.data.table.R
@@ -46,7 +46,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     # nocov start
     fn2 = paste0(fn,".bz2")
     if (!file.exists(file.path(fulldir, fn2)))
-      stop(gettextf("Neither %s nor %s exist in %s",fn, fn2, fulldir, domain="R-data.table"))
+      stop(domain=NA, gettextf("Neither %s nor %s exist in %s",fn, fn2, fulldir))
     fn = fn2
     # nocov end
     # sys.source() below accepts .bz2 directly.
@@ -92,17 +92,15 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
 
   cat("getDTthreads(verbose=TRUE):\n")         # for tracing on CRAN; output to log before anything is attempted
   getDTthreads(verbose=TRUE)                   # includes the returned value in the verbose output (rather than dangling '[1] 4'); e.g. "data.table is using 4 threads"
-  cat("test.data.table() running:", fn, "\n")  # print fn to log before attempting anything on it (in case it is missing); on same line for slightly easier grep
+  catf("test.data.table() running: %s\n", fn)  # print fn to log before attempting anything on it (in case it is missing); on same line for slightly easier grep
   env = new.env(parent=.GlobalEnv)
   assign("testDir", function(x) file.path(fulldir, x), envir=env)
 
   # are R's messages being translated to a foreign language? #3039, #630
-  txt = eval(parse(text="tryCatch(mean(not__exist__), error = function(e) e$message)"), envir=.GlobalEnv)
-  foreign = txt != "object 'not__exist__' not found"
+  foreign = gettext("object '%s' not found", domain="R") != "object '%s' not found"
   if (foreign) {
     # nocov start
-    cat("\n**** This R session's language is not English. Each test will still check that the correct number of errors and/or\n",
-          "**** warnings are produced. However, to test the text of each error/warning too, please restart R with LANGUAGE=en\n\n", sep="")
+    catf("\n**** This R session's language is not English. Each test will still check that the correct number of errors and/or\n**** warnings are produced. However, to test the text of each error/warning too, please restart R with LANGUAGE=en\n\n")
     # nocov end
   }
   assign("foreign", foreign, envir=env)
@@ -162,8 +160,14 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
   ntest = env$ntest
   if (nfail > 0L) {
     # nocov start
-    if (nfail > 1L) {s1="s";s2="s: "} else {s1="";s2=" "}
-    stop(nfail," error",s1," out of ",ntest,". Search ",names(fn)," for test number",s2,paste(env$whichfail,collapse=", "),".")
+    # domain=NA since it's already translated by then
+    stop(domain = NA, sprintf(
+      ngettext(
+        nfail,
+        "%d error out of %d. Search %s for test number %s",
+        "%d errors out of %d. Search %s for test numbers %s"
+      ), nfail, ntest, names(fn), paste(env$whichfail, collapse=", ")
+    ))
     # important to stop() here, so that 'R CMD check' fails
     # nocov end
   }
@@ -172,12 +176,12 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
   timings = env$timings
   DT = head(timings[-1L][order(-time)], 10L)   # exclude id 1 as in dev that includes JIT
   if ((x<-sum(timings[["nTest"]])) != ntest) {
-    warning("Timings count mismatch:",x,"vs",ntest)  # nocov
+    warning("Timings count mismatch: ",x," vs ",ntest)  # nocov
   }
-  cat("10 longest running tests took ", as.integer(tt<-DT[, sum(time)]), "s (", as.integer(100*tt/(ss<-timings[,sum(time)])), "% of ", as.integer(ss), "s)\n", sep="")
+  catf("10 longest running tests took %ds (%d%% of %ds)\n", as.integer(tt<-DT[, sum(time)]), as.integer(100*tt/(ss<-timings[,sum(time)])), as.integer(ss))
   print(DT, class=FALSE)
 
-  cat("All ",ntest," tests (last ",env$prevtest,") in ",names(fn)," completed ok in ",timetaken(env$started.at),"\n",sep="")
+  catf("All %d tests (last %s) in %s completed ok in %s\n", ntest, env$prevtest, names(fn), timetaken(env$started.at))
 
   ## this chunk requires to include new suggested deps: graphics, grDevices
   #memtest.plot = function(.inittime) {
@@ -211,10 +215,10 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
 compactprint = function(DT, topn=2L) {
   tt = vapply_1c(DT,function(x)class(x)[1L])
   tt[tt=="integer64"] = "i64"
-  tt = substring(tt, 1L, 3L)
+  tt = substr(tt, 1L, 3L)
   makeString = function(x) paste(x, collapse = ",")  # essentially toString.default
   cn = paste0(" [Key=",makeString(key(DT)),
-             " Types=", makeString(substring(sapply(DT, typeof), 1L, 3L)),
+             " Types=", makeString(substr(sapply(DT, typeof), 1L, 3L)),
              " Classes=", makeString(tt), "]")
   if (nrow(DT)) {
     print(copy(DT)[,(cn):="",verbose=FALSE], topn=topn, class=FALSE)
@@ -255,6 +259,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
   # iv) if warning is supplied, y is checked to equal x, and x should result in a warning message matching the pattern
   # v) if output is supplied, x is evaluated and printed and the output is checked to match the pattern
   # num just needs to be numeric and unique. We normally increment integers at the end, but inserts can be made using decimals e.g. 10,11,11.1,11.2,12,13,...
+  # num=0 to escape global failure tracking so we can test behaviour of test function itself: test(1.1, test(0, TRUE, FALSE), FALSE, output="1 element mismatch")
   # Motivations:
   # 1) we'd like to know all tests that fail not just stop at the first. This often helps by revealing a common feature across a set of
   #    failing tests
@@ -268,7 +273,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     prevtest = get("prevtest", parent.frame())
     nfail = get("nfail", parent.frame())   # to cater for both test.data.table() and stepping through tests in dev
     whichfail = get("whichfail", parent.frame())
-    assign("ntest", get("ntest", parent.frame()) + 1L, parent.frame(), inherits=TRUE)   # bump number of tests run
+    assign("ntest", get("ntest", parent.frame()) + if (num>0) 1L else 0L, parent.frame(), inherits=TRUE)   # bump number of tests run
     lasttime = get("lasttime", parent.frame())
     timings = get("timings", parent.frame())
     memtest = get("memtest", parent.frame())
@@ -277,14 +282,15 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     foreign = get("foreign", parent.frame())
     showProgress = get("showProgress", parent.frame())
     time = nTest = NULL  # to avoid 'no visible binding' note
-    on.exit( {
+    if (num>0) on.exit( {
        now = proc.time()[3L]
        took = now-lasttime  # so that prep time between tests is attributed to the following test
        assign("lasttime", now, parent.frame(), inherits=TRUE)
        timings[ as.integer(num), `:=`(time=time+took, nTest=nTest+1L), verbose=FALSE ]
     } )
     if (showProgress)
-      cat("\rRunning test id", numStr, "     ")   # nocov.
+      # \r can't be in gettextf msg
+      cat("\rRunning test id", numStr, "         ")   # nocov.
     # See PR #4090 for comments about change here in Dec 2019.
     # If a segfault error occurs in future and we'd like to know after which test, then arrange for the
     # try(sys.source()) in test.data.table() to be run in a separate R process. That process could write out
@@ -338,10 +344,10 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     fwrite(mem, "memtest.csv", append=TRUE, verbose=FALSE)                                                                             # nocov
   }
   fail = FALSE
-  if (.test.data.table) {
+  if (.test.data.table && num>0) {
     if (num<prevtest+0.0000005) {
       # nocov start
-      cat("Test id", numStr, "is not in increasing order\n")
+      catf("Test id %s is not in increasing order\n", numStr)
       fail = TRUE
       # nocov end
     }
@@ -358,9 +364,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     }
     if (length(expected) != length(observed)) {
       # nocov start
-      cat("Test ",numStr," produced ",length(observed)," ",type,"s but expected ",length(expected),"\n",sep="")
-      cat(paste("Expected:",expected), sep="\n")
-      cat(paste("Observed:",observed), sep="\n")
+      catf("Test %s produced %d %ss but expected %d\n%s\n%s\n", numStr, length(observed), type, length(expected), paste("Expected:", expected), paste("Observed:", observed))
       fail = TRUE
       # nocov end
     } else {
@@ -368,9 +372,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
       for (i in seq_along(expected)) {
         if (!foreign && !string_match(expected[i], observed[i])) {
           # nocov start
-          cat("Test",numStr,"didn't produce the correct",type,":\n")
-          cat("Expected:", expected[i], "\n")
-          cat("Observed:", observed[i], "\n")
+          catf("Test %s didn't produce the correct %s:\nExpected: %s\nObserved: %s\n", numStr, type, expected[i], observed[i])
           fail = TRUE
           # nocov end
         }
@@ -379,8 +381,8 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
   }
   if (fail && exists("out",inherits=FALSE)) {
     # nocov start
-    cat("Output captured before unexpected warning/error/message:\n")
-    cat(out,sep="\n")
+    catf("Output captured before unexpected warning/error/message:\n")
+    writeLines(out)
     # nocov end
   }
   if (!fail && !length(error) && (length(output) || length(notOutput))) {
@@ -389,17 +391,17 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     output = paste(output, collapse="\n")  # so that output= can be either a \n separated string, or a vector of strings.
     if (length(output) && !string_match(output, out)) {
       # nocov start
-      cat("Test",numStr,"did not produce correct output:\n")
-      cat("Expected: <<",gsub("\n","\\\\n",output),">>\n",sep="")  # \n printed as '\\n' so the two lines of output can be compared vertically
-      cat("Observed: <<",gsub("\n","\\\\n",out),">>\n",sep="")
+      catf("Test %s did not produce correct output:\n", numStr)
+      catf("Expected: <<%s>>\n", encodeString(output))  # \n printed as '\\n' so the two lines of output can be compared vertically
+      catf("Observed: <<%s>>\n", encodeString(out))
       fail = TRUE
       # nocov end
     }
     if (length(notOutput) && string_match(notOutput, out, ignore.case=TRUE)) {
       # nocov start
-      cat("Test",numStr,"produced output but should not have:\n")
-      cat("Expected absent (case insensitive): <<",gsub("\n","\\\\n",notOutput),">>\n",sep="")
-      cat("Observed: <<",gsub("\n","\\\\n",out),">>\n",sep="")
+      catf("Test %s produced output but should not have:\n", numStr)
+      catf("Expected absent (case insensitive): <<%s>>\n", encodeString(notOutput))
+      catf("Observed: <<%s>>\n", encodeString(out))
       fail = TRUE
       # nocov end
     }
@@ -411,7 +413,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     if (is.data.table(x) && is.data.table(y)) {
       if (!selfrefok(x) || !selfrefok(y)) {
         # nocov start
-        cat("Test ",numStr," ran without errors but selfrefok(", if(!selfrefok(x))"x"else"y", ") is FALSE\n", sep="")
+        catf("Test %s ran without errors but selfrefok(%s) is FALSE\n", numStr, if (selfrefok(x)) "y" else "x")
         fail = TRUE
         # nocov end
       } else {
@@ -434,12 +436,12 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     # For test 617 on r-prerel-solaris-sparc on 7 Mar 2013
     # nocov start
     if (!fail) {
-      cat("Test", numStr, "ran without errors but failed check that x equals y:\n")
+      catf("Test %s ran without errors but failed check that x equals y:\n", numStr)
       failPrint = function(x, xsub) {
         cat(">", substitute(x), "=", xsub, "\n")
         if (is.data.table(x)) compactprint(x) else {
           nn = length(x)
-          cat(sprintf("First %d of %d (type '%s'): \n", min(nn, 6L), length(x), typeof(x)))
+          catf("First %d of %d (type '%s'): \n", min(nn, 6L), length(x), typeof(x))
           # head.matrix doesn't restrict columns
           if (length(d <- dim(x))) do.call(`[`, c(list(x, drop = FALSE), lapply(pmin(d, 6L), seq_len)))
           else print(head(x))
@@ -452,7 +454,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
     }
     # nocov end
   }
-  if (fail && .test.data.table) {
+  if (fail && .test.data.table && num>0) {
     # nocov start
     assign("nfail", nfail+1L, parent.frame(), inherits=TRUE)
     assign("whichfail", c(whichfail, numStr), parent.frame(), inherits=TRUE)
diff --git a/R/utils.R b/R/utils.R
index 42e67ea8de..7a698131c6 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -25,6 +25,13 @@ if (base::getRversion() < "3.2.0") {  # Apr 2015
   isNamespaceLoaded = function(x) x %chin% loadedNamespaces()
 }
 
+if (!exists('startsWith', 'package:base', inherits=FALSE)) {  # R 3.3.0; Apr 2016
+  startsWith = function(x, stub) substr(x, 1L, nchar(stub))==stub
+}
+if (!exists('endsWith', 'package:base', inherits=FALSE)) {
+  endsWith = function(x, stub) {n=nchar(x); substr(x, n-nchar(stub)+1L, n)==stub}
+}
+
 # which.first
 which.first = function(x)
 {
@@ -45,7 +52,7 @@ which.last = function(x)
 
 require_bit64_if_needed = function(DT) {
   # called in fread and print.data.table
-  if (!isNamespaceLoaded("bit64") && any(sapply(DT,inherits,"integer64"))) {
+  if (!isNamespaceLoaded("bit64") && any(vapply_1b(DT, inherits, "integer64"))) {
     # nocov start
     # a test was attempted to cover the requireNamespace() by using unloadNamespace() first, but that fails when nanotime is loaded because nanotime also uses bit64
     if (!requireNamespace("bit64",quietly=TRUE)) {
@@ -84,7 +91,7 @@ name_dots = function(...) {
   }
   notnamed = vnames==""
   if (any(notnamed)) {
-    syms = sapply(dot_sub, is.symbol)  # save the deparse() in most cases of plain symbol
+    syms = vapply_1b(dot_sub, is.symbol)  # save the deparse() in most cases of plain symbol
     for (i in which(notnamed)) {
       tmp = if (syms[i]) as.character(dot_sub[[i]]) else deparse(dot_sub[[i]])[1L]
       if (tmp == make.names(tmp)) vnames[i]=tmp
@@ -101,27 +108,32 @@ brackify = function(x, quote=FALSE) {
   # keep one more than needed to trigger dots if needed
   if (quote && is.character(x)) x = paste0("'",head(x,CUTOFF+1L),"'")
   if (length(x) > CUTOFF) x = c(x[1:CUTOFF], '...')
-  sprintf('[%s]', paste(x, collapse = ', '))
+  sprintf('[%s]', toString(x))
 }
 
 # patterns done via NSE in melt.data.table and .SDcols in `[.data.table`
-do_patterns = function(pat_sub, all_cols) {
-  # received as substitute(patterns(...))
-  pat_sub = as.list(pat_sub)[-1L]
-  # identify cols = argument if present
-  idx = which(names(pat_sub) == "cols")
-  if (length(idx)) {
-    cols = eval(pat_sub[["cols"]], parent.frame(2L))
-    pat_sub = pat_sub[-idx]
-  } else cols = all_cols
-  pats = lapply(pat_sub, eval, parent.frame(2L))
-  matched = patterns(pats, cols=cols)
-  # replace with lengths when R 3.2.0 dependency arrives
-  if (length(idx <- which(sapply(matched, length) == 0L)))
-    stop('Pattern', if (length(idx) > 1L) 's', ' not found: [',
-         paste(pats[idx], collapse = ', '), ']')
-
-  return(matched)
+# was called do_patterns() before PR#4731
+eval_with_cols = function(orig_call, all_cols) {
+  parent = parent.frame(2L)
+  fun_uneval = orig_call[[1L]]
+  # take fun from either calling env (parent) or from data.table
+  fun = tryCatch({
+    maybe_fun = eval(fun_uneval, parent)
+    # parent env could have a non-function with this name, which we
+    # should ignore.
+    stopifnot(is.function(maybe_fun))
+    maybe_fun
+  }, error=function(e) {
+    eval(fun_uneval)#take function from data.table namespace.
+  })
+  if (!is.primitive(fun)) {
+    named_call = match.call(fun, orig_call)
+    if ("cols" %in% names(formals(fun)) && !"cols" %in% names(named_call)) {
+      named_call[["cols"]] = all_cols
+    }
+    named_call[[1L]] = fun
+    eval(named_call, parent)
+  }
 }
 
 # check UTC status
@@ -140,3 +152,8 @@ edit.data.table = function(name, ...) {
   setDT(NextMethod('edit', name))[]
 }
 # nocov end
+
+catf = function(fmt, ...) {
+  cat(gettextf(fmt, ...))
+}
+
diff --git a/R/xts.R b/R/xts.R
index bfb6f813a7..fce6aad3b5 100644
--- a/R/xts.R
+++ b/R/xts.R
@@ -7,8 +7,8 @@ as.data.table.xts = function(x, keep.rownames = TRUE, key=NULL, ...) {
   r = setDT(as.data.frame(x, row.names=NULL))
   if (identical(keep.rownames, FALSE)) return(r[])
   index_nm = if (is.character(keep.rownames)) keep.rownames else "index"
-  if (index_nm %chin% names(x)) stop(gettextf("Input xts object should not have '%s' column because it would result in duplicate column names. Rename '%s' column in xts or use `keep.rownames` to change the index column name.", index_nm, index_nm, domain="R-data.table"), domain=NA)
-  r[, c(index_nm) := zoo::index(x)]
+  if (index_nm %chin% names(x)) stop(domain=NA, gettextf("Input xts object should not have '%s' column because it would result in duplicate column names. Rename '%s' column in xts or use `keep.rownames` to change the index column name.", index_nm, index_nm))
+  r[, c(index_nm) := zoo::index(x), env=list(x=x)]
   setcolorder(r, c(index_nm, setdiff(names(r), index_nm)))
   # save to end to allow for key=index_nm
   setkeyv(r, key)
@@ -19,7 +19,7 @@ as.xts.data.table = function(x, ...) {
   stopifnot(requireNamespace("xts"), !missing(x), is.data.table(x))
   if (!xts::is.timeBased(x[[1L]])) stop("data.table must have a time based column in first position, use `setcolorder` function to change the order, or see ?timeBased for supported types")
   colsNumeric = vapply_1b(x, is.numeric)[-1L] # exclude first col, xts index
-  if (any(!colsNumeric)) warning("Following columns are not numeric and will be omitted: ", brackify(names(colsNumeric)[!colsNumeric]))
+  if (!all(colsNumeric)) warning("Following columns are not numeric and will be omitted: ", brackify(names(colsNumeric)[!colsNumeric]))
   r = setDF(x[, .SD, .SDcols = names(colsNumeric)[colsNumeric]])
   return(xts::as.xts(r, order.by = if ("IDate" %chin% class(x[[1L]])) as.Date(x[[1L]]) else x[[1L]]))
 }
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 6d2ef397d3..4b02b39491 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -36,8 +36,10 @@ navbar:
         href: articles/datatable-keys-fast-subset.html
       - text: "Secondary indices and auto indexing"
         href: articles/datatable-secondary-indices-and-auto-indexing.html
-      - text: "Efficient reshaping using data.tables"
+      - text: "Efficient reshaping using data.table"
         href: articles/datatable-reshape.html
+      - text: "Programming on data.table"
+        href: articles/datatable-programming.html
       - text: "Frequently asked questions"
         href: articles/datatable-faq.html
       - text: "Importing data.table"
diff --git a/inst/include/datatableAPI.h b/inst/include/datatableAPI.h
index 44f52018f4..e2a1b2fd32 100644
--- a/inst/include/datatableAPI.h
+++ b/inst/include/datatableAPI.h
@@ -21,11 +21,14 @@ extern "C" {
 /* provided the interface for the function exported in
    ../src/init.c via R_RegisterCCallable()		*/
 
+// subsetDT #3751
 inline SEXP attribute_hidden DT_subsetDT(SEXP x, SEXP rows, SEXP cols) {
      static SEXP(*fun)(SEXP, SEXP, SEXP) =
-       (SEXP(*)(SEXP,SEXP,SEXP)) R_GetCCallable("data.table", "CsubsetDT");
+       (SEXP(*)(SEXP,SEXP,SEXP)) R_GetCCallable("data.table", "DT_subsetDT");
      return fun(x,rows,cols);
 }
+// forder #4015
+// setalloccol alloccolwrapper setDT #4439
 
 /* permit opt-in to redefine shorter identifiers */
 #if defined(DATATABLE_REMAP_API)
diff --git a/inst/tests/benchmark.Rraw b/inst/tests/benchmark.Rraw
index 1c8bf146a6..bf0bf77e9f 100644
--- a/inst/tests/benchmark.Rraw
+++ b/inst/tests/benchmark.Rraw
@@ -161,10 +161,10 @@ set.seed(1)
 L = lapply(1:1e6, sample, x=100, size=2)
 x = capture.output(fwrite(L))
 test(1742.1, nchar(x), c(2919861L, 2919774L))   # tests 2 very long lines, too
-test(1742.2, substring(x,1,10), c("27,58,21,9","38,91,90,6"))
+test(1742.2, substr(x, 1L, 10L), c("27,58,21,9", "38,91,90,6"))
 test(1742.3, L[[1L]], c(27L,38L))
 test(1742.4, L[[1000000L]], c(76L, 40L))
-test(1742.5, substring(x,nchar(x)-10,nchar(x)), c("50,28,95,76","62,87,23,40"))
+test(1742.5, substr(x, nchar(x)-10L, nchar(x)), c("50,28,95,76","62,87,23,40"))
 
 # Add scaled-up non-ASCII forder test 1896
 
diff --git a/inst/tests/other.Rraw b/inst/tests/other.Rraw
index 1bd91286f9..03d62b4389 100644
--- a/inst/tests/other.Rraw
+++ b/inst/tests/other.Rraw
@@ -186,8 +186,6 @@ if (loaded[["parallel"]]) {
 }
 
 # example(":=", local=TRUE) triggered cedta==FALSE and then error, #2972
-res = tryCatch(example(':=', package='data.table', local=TRUE))
-test(14.1, !inherits(res, 'error'))
-res = tryCatch(example('CJ', package='data.table', local=TRUE))
-test(14.2, !inherits(res, 'error'))
+test(14.1, {example(':=', package='data.table', local=TRUE); TRUE})
+test(14.2, {example('CJ', package='data.table', local=TRUE); TRUE})
 
diff --git a/inst/tests/programming.Rraw b/inst/tests/programming.Rraw
new file mode 100644
index 0000000000..88c6a99e6f
--- /dev/null
+++ b/inst/tests/programming.Rraw
@@ -0,0 +1,600 @@
+require(methods)
+if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
+  if ((tt<-compiler::enableJIT(-1))>0)
+    cat("This is dev mode and JIT is enabled (level ", tt, ") so there will be a brief pause around the first test.\n", sep="")
+} else {
+  require(data.table)
+  test = data.table:::test
+  is.AsIs = data.table:::is.AsIs
+  rm.AsIs = data.table:::rm.AsIs
+  enlist = data.table:::enlist
+  list2lang = data.table:::list2lang
+}
+
+# test that 'test' catches the difference in language object
+cl1 = substitute(f(1L, list(2L)))
+cl2 = substitute(f(1L, .v), list(.v=list(2L)))
+test(1.01, all.equal(cl1, cl2), TRUE)
+test(1.02, identical(cl1, cl2), FALSE)
+test(1.03, test(0, cl1, cl2), FALSE, output="f(1L, list(2L))")
+# AsIs
+test(1.11, is.AsIs(1L), FALSE)
+test(1.12, is.AsIs(I(1L)), TRUE)
+test(1.13, is.AsIs("a"), FALSE)
+test(1.14, is.AsIs(I("a")), TRUE)
+test(1.15, is.AsIs(list(1L)), FALSE)
+test(1.16, is.AsIs(I(list(1L))), TRUE)
+test(1.17, is.AsIs(structure(list(NULL), class="an_S3")), FALSE) ## S3
+test(1.18, is.AsIs(I(structure(list(NULL), class="an_S3"))), TRUE)
+test(1.19, is.AsIs(getClass("MethodDefinition")), FALSE) ## S4
+test(1.20, is.AsIs(suppressWarnings(I(getClass("MethodDefinition")))), TRUE) ## suppressWarnings due new warning in R 4.1
+test(1.21, is.AsIs(rm.AsIs(1L)), FALSE)
+test(1.22, is.AsIs(rm.AsIs(I(1L))), FALSE)
+test(1.23, is.AsIs(rm.AsIs(list(1L))), FALSE)
+test(1.24, is.AsIs(rm.AsIs(I(list(1L)))), FALSE)
+
+# substitute2 simple
+test(2.01, substitute2(list(var = val), env = list(var="my_var", val=5L)), quote(list(my_var = 5L)))
+# substitute2 + I to handle char and symbol
+test(2.02, substitute2(list(var = val), env = list(var="my_var", val=I("my_val"))), quote(list(my_var="my_val")))
+test(2.03, substitute2(list(var = val), env = I(list(var=as.name("my_var"), val="my_val"))), quote(list(my_var="my_val")))
+# substitute2 handle symbol anyway
+test(2.04, substitute2(list(var = val), env = list(var=as.name("my_var"), val=I("my_val"))), quote(list(my_var="my_val")))
+# substitute2 complex use case
+test(2.11, substitute2(
+  .(fun_ans_var = fun(farg1, farg2=farg2val), timestamp=Sys.time(), col_head = head(head_arg, n=1L)),
+  list(
+    fun_ans_var = "my_mean_res",
+    fun = "mean",
+    farg1 = "my_x_col",
+    farg2 = "na.rm",
+    farg2val = TRUE,
+    col_head = "first_y",
+    head_arg = "y"
+  )
+), quote(.(my_mean_res=mean(my_x_col, na.rm=TRUE), timestamp=Sys.time(), first_y=head(y, n=1L))))
+# substitute2 PR example
+test(2.12, substitute2(
+  .(out_col_name = fun(in_col_name, fun_arg1=fun_arg1val)),
+  env = list(
+    in_col_name = "x",
+    fun = "sum",
+    fun_arg1 = "na.rm",
+    fun_arg1val = TRUE,
+    out_col_name = "sum_x"
+  )
+), quote(.(sum_x = sum(x, na.rm=TRUE))))
+# substitute2 nested calls argument names substitute
+test(2.13, substitute2(
+  f1(a1 = f2(a2 = f3(a3 = f4(a4 = v1, extra=v2), v3, a3b = v4)), a1b=c("a","b")),
+  list(f1="fun1", f2="fun2", f3="fun3", f4="fun4", a1="arg1", a2="arg2", a3="arg3", a4="arg4", v1="col1", extra="n", v2=6L, v3="col2", a3b="arg3b", v4=c(3.5,4.5), a1b="arg1b")
+), substitute(
+  fun1(arg1 = fun2(arg2 = fun3(arg3 = fun4(arg4 = col1, n=6L), col2, arg3b = v4)), arg1b=c("a","b")),
+  list(v4=c(3.5,4.5))
+))
+# calls of length 0 args
+const1 = function() 1L
+test(2.21, substitute2(list(nm = fun()), env=list(a="b", fun="const1", nm="int1")), quote(list(int1=const1())))
+test(2.22, substitute2(.(), env=list(a="b", fun="const1", nm="int1")), quote(.()))
+test(2.23, identical(substitute2(), substitute()))
+# substitute2 AsIs class properly removed or kept
+test(2.31, class(substitute2(var3%in%values, list(var3="a", values=I(c("a","b","c"))))[[3L]]), "character")
+test(2.32, class(substitute2(var3%in%values, I(list(var3=as.name("a"), values=c("a","b","c"))))[[3L]]), "character")
+test(2.33, class(substitute2(var3%in%values, list(var3="a", values=I(1:3)))[[3L]]), "integer")
+test(2.34, class(substitute2(var3%in%values, I(list(var3=as.name("a"), values=c(1:3))))[[3L]]), "integer")
+cl = substitute2(var3%in%values, I(list(var3=as.name("a"), values=I(c("a","b","c"))))) ## keeping AsIs by extra I on whole env arg
+test(2.35, cl, substitute(a %in% .v, list(.v=I(c("a","b","c")))))
+test(2.36, class(cl[[3L]]), "AsIs")
+cl = substitute2(var3%in%values, I(list(var3="a", values=I(1:3))))
+test(2.37, cl, substitute("a" %in% .v, list(.v=I(1:3))))
+test(2.38, class(cl[[3L]]), "AsIs")
+# substitute2 non-scalar char as name
+test(2.41, substitute2(list(var = val), env = list(var="my_var", val=c("a","b"))), error="are not scalar")
+test(2.42, substitute2(list(var = val), env = list(var="my_var", val=I(c("a","b")))), substitute(list(my_var=.v), list(.v=c("a","b")))) ## note that quote(list(my_var=c("a","b")))) will not work because 'c("a","b")' will be a 'language' class (a 'c()' call), but we need to have it as 'character' class instead
+test(2.43, substitute2(list(var = val), env = I(list(var=as.name("my_var"), val=c("a","b")))), substitute(list(my_var=.v), list(.v=c("a","b"))))
+# substitute2 non-symbol
+test(2.44, substitute2(list(var = val), env = list(var=I("my_var"), val="my_val")), error="type 'character' but it has to be 'symbol'")
+test(2.45, substitute2(list(var = val), env = I(list(var="my_var", val="my_val"))), error="type 'character' but it has to be 'symbol'")
+test(2.46, substitute2(.(v1=v2), list(v1=1L, v2=2L)), error="type 'integer' but it has to be 'symbol'")
+test(2.47, substitute2(.(v1=v2), list(v1=FALSE, v2=2L)), error="type 'logical' but it has to be 'symbol'")
+# substitute2 NA_character_ becomes valid 'NA' name
+test(2.48, substitute2(.(v1 = v2), list(v1 = NA_character_, v2 = NA_character_, "." = "list")), quote(list(`NA` = `NA`)))
+cl = substitute2(.(v1 = v2), list(v1 = NA_character_, v2 = I(NA_character_), "." = "list"))
+test(2.49, cl, quote(list(`NA` = NA_character_)))
+test(2.50, eval(cl), list("NA" = NA_character_))
+# substitute2 duplicate matches
+test(2.51, substitute2(list(v1=v2, v1=v2), env=list(v1="nm",v2=2L,v3=3L)), quote(list(nm = 2L, nm = 2L)))
+test(2.52, substitute2(list(v1=v2, v1=v3), env=list(v1="nm",v2=2L,v3=3L)), quote(list(nm = 2L, nm = 3L)))
+# substitute2 nested unnamed call
+test(2.53, substitute2(c(list(v1=v2, v1=v2)), env=list(v1="nm",v2=2L,v3=3L)), quote(c(list(nm = 2L, nm = 2L))))
+test(2.54, substitute2(c(list(v1=v2, v1=v3)), env=list(v1="nm",v2=2L,v3=3L)), quote(c(list(nm = 2L, nm = 3L))))
+
+# substitute2 env as environment class
+e = as.environment(list(v=1L, .v=2L))
+test(2.81, substitute2(.(v, .v), e), quote(.(1L, 2L)))
+# unline in base R substitute, the env arg is always evaluated
+e = new.env()
+delayedAssign("a_promise", stop("I am the error"), assign.env=e)
+e$x = 5L
+promises = function(env) {
+  f = function(x, env) eval(substitute(substitute(.x, env), list(.x=x)))
+  sym = lapply(setNames(nm=ls(env)), as.name)
+  lapply(sym, f, env)
+}
+test(2.820, promises(e), list(a_promise=quote(stop("I am the error")), x=5L))
+test(2.821, substitute(x + 1L, e), quote(5L + 1L))
+test(2.822, substitute2(x + 1L, e), error="I am the error", ignore.warning="restarting interrupted promise evaluation")
+# substitute2 env various corner cases
+test(2.901, substitute2(.(v), NULL), quote(.(v)))
+test(2.902, substitute2(.(v), list()), quote(.(v)))
+test(2.903, substitute2(.(v), emptyenv()), quote(.(v)))
+test(2.91, substitute2(.()), error="'env' must not be missing")
+test(2.92, substitute2(v, c(v=1L)), error="'env' must be a list or an environment")
+test(2.93, substitute2(.(v), list(1L, 2L)), error="'env' argument does not have names")
+test(2.94, substitute2(.(v), structure(list(1L,2L), names=c("","v"))), error="'env' argument has zero char names")
+test(2.95, substitute2(.(v), structure(list(1,2), names=c(NA,"v"))), error="'env' argument has NA names")
+test(2.96, substitute2(.(v), list(v=1,v=2)), error="'env' argument has duplicated names")
+
+# substitute2 re-use inside another function
+f = function(expr, env) {
+  eval(substitute(
+    substitute2(.expr, env),
+    list(.expr = substitute(expr))
+  ))
+}
+cl = f(
+  .(out_col_name = fun(in_col_name, fun_arg1=fun_arg1val)),
+  env = list(
+    in_col_name = "x",
+    fun = "sum",
+    fun_arg1 = "na.rm",
+    fun_arg1val = TRUE,
+    out_col_name = "sum_x"
+  )
+)
+test(3.01, cl, quote(.(sum_x = sum(x, na.rm = TRUE))))
+# substitute2 nested re-use inside another function
+cl = substitute2(list(nm = fun(.(out_col_name = fun(in_col_name, fun_arg1=fun_arg1val)),
+  env = list(
+    in_col_name = "x",
+    fun = "sum",
+    fun_arg1 = "na.rm",
+    fun_arg1val = tf_var, ## note a parameter here
+    out_col_name = "sum_x"
+))), list(nm="my_call", fun="f", tf_var=FALSE))
+test(3.02, eval(cl), list(my_call = quote(.(sum_x = sum(x, na.rm = FALSE)))))
+
+# enlist
+test(4.01, enlist(c("a")), error="'x' must be a list")
+test(4.02, enlist(list("V1","V2")), quote(list(V1, V2)))
+test(4.03, enlist(list(V1="V1", V2="V2")), quote(list(V1=V1, V2=V2)))
+test(4.04, enlist(I(list(V1="V1", V2="V2"))), list(V1="V1", V2="V2"))
+test(4.05, enlist(list(V1=I("V1"), V2=I("V2"))), quote(list(V1="V1", V2="V2")))
+test(4.06, enlist(list(V1="V1", V2=I("V2"))), quote(list(V1=V1, V2="V2")))
+test(4.07, enlist(list(V1="V1", V2=I("V2"), V3=list("X1", "X2"))), quote(list(V1=V1, V2="V2", V3=list(X1, X2))))
+test(4.08, enlist(list(V1="V1", V2=I("V2"), V3=list(X1="X1", X2=I("X2")))), quote(list(V1=V1, V2="V2", V3=list(X1=X1, X2="X2"))))
+test(4.09, enlist(list(V1="V1", V2=I("V2"), V3=enlist(list("X1","X2")))), quote(list(V1 = V1, V2 = "V2", V3 = list(X1, X2))))
+test(4.10, enlist(list(V1="V1", V2=I("V2"), V3=I(enlist(list("X1","X2"))))), quote(list(V1 = V1, V2 = "V2", V3 = list(X1, X2))))
+test(4.11, enlist(list(V1="V1", V2=I("V2"), V3=enlist(I(list("X1","X2"))))), quote(list(V1 = V1, V2 = "V2", V3 = list(X1, X2))))
+test(4.12, enlist(list(V1="V1", V2=I("V2"), V3=I(enlist(I(list("X1","X2")))))), substitute(list(V1 = V1, V2 = "V2", V3 = lst), list(lst = list("X1", "X2"))))
+test(4.13, enlist(list(V1="V1", V2=I("V2"), V3=I(enlist(list(I("X1"),I("X2")))))), quote(list(V1 = V1, V2 = "V2", V3 = list("X1", "X2"))))
+test(4.14, enlist(I(list(V1="V1", V2=list("V2")))), list(V1="V1", V2=list("V2")))
+test(4.15, enlist(I(list(V1="V1", V2=I(list("V2"))))), list(V1="V1", V2=I(list("V2"))))
+
+# list2lang
+test(5.01, list2lang(c("a")), error="'x' must be a list")
+test(5.02, list2lang(list("a", 1L)), list(as.name("a"), 1L))
+test(5.03, list2lang(I(list("a", 1L))), list("a", 1L))
+test(5.04, list2lang(list(I("a"), 1L)), list("a", 1L))
+test(5.05, list2lang(list("a", 1L, list("b"))), list(as.name("a"), 1L, call("list", as.name("b"))))
+test(5.06, list2lang(list("a", 1L, list(I("b")))), list(as.name("a"), 1L, call("list", "b")))
+test(5.07, list2lang(list("a", 1L, I(list("b")))), list(as.name("a"), 1L, list("b")))
+test(5.08, list2lang(I(list("a", 1L, list("b")))), list("a", 1L, list("b")))
+test(5.09, list2lang(I(list("a", 1L, I(list("b"))))), list("a", 1L, I(list("b"))))
+test(5.10, list2lang(list("a", 1L, c(1L, 2L))), list(as.name("a"), 1L, c(1L,2L))) ## no 'enlist' like feature for 'c()' function, see next test
+test(5.11, list2lang(list("a", 1L, call("c", 1L, 2L))), list(as.name("a"), 1L, quote(c(1L, 2L))))
+
+# datatable.enlist
+op = options(datatable.enlist=NULL)
+test(6.01,
+     substitute2(list(v1 = v2, v3 = v4), list(v1 = "int", v2 = 1L, v3 = "lst", v4 = list("a", "b", list("c", "d")))),
+     quote(list(int = 1L, lst = list(a, b, list(c, d)))))
+options(datatable.enlist=FALSE)
+test(6.02,
+     substitute2(list(v1 = v2, v3 = v4), list(v1 = "int", v2 = 1L, v3 = "lst", v4 = list("a", "b", list("c", "d")))),
+     substitute(list(int = 1L, lst = lst), list(lst = list("a", "b", list("c", "d")))))
+options(datatable.enlist=NULL)
+test(6.03,
+     enlist(list(v1 = 1L, v2 = list(v3 = "b", v4 = list(v5 = "c")))),
+     quote(list(v1 = 1L, v2 = list(v3 = b, v4 = list(v5 = c)))))
+options(datatable.enlist=FALSE)
+test(6.04,
+     enlist(list(v1 = 1L, v2 = list(v3 = "b", v4 = list(v5 = "c")))),
+     substitute(list(v1 = 1L, v2 = lst), list(lst=list(v3 = "b", v4 = list(v5 = "c")))))
+options(datatable.enlist=NULL)
+test(6.05,
+     substitute2(list(v1, v2, v3), list(v1="V1", v2="V2", v3=enlist(list("V4","V5")))),
+     quote(list(V1, V2, list(V4, V5))))
+options(datatable.enlist=FALSE)
+test(6.06,
+     substitute2(list(v1, v2, v3), list(v1="V1", v2="V2", v3=enlist(list("V4","V5")))),
+     quote(list(V1, V2, list(V4, V5))))
+test(6.07,
+     substitute2(list(v1, v2, v3), list(v1="V1", v2="V2", v3=enlist(list("V4","V5", list("V6"))))),
+     substitute(list(V1, V2, list(V4, V5, lst)), list(lst=list("V6"))))
+test(6.08,
+     substitute2(list(v1, v2, v3), list(v1="V1", v2="V2", v3=enlist(list("V4","V5", enlist(list("V6")))))),
+     quote(list(V1, V2, list(V4, V5, list(V6)))))
+options(op)
+
+# documentation examples
+test(7.01, substitute2(list(var1 = var2), list(var1 = "c1", var2 = 5L)), quote(list(c1 = 5L))) ## works also on names
+test(7.02, substitute2(var1, list(var1 = I("c1"))), "c1") ## enforce character with I
+test(7.03, substitute2(var1, list(var1 = "c1")), quote(c1)) ## turn character into symbol, for convenience
+test(7.04, substitute2(list(var1 = var2), list(var1 = "c1", var2 = I("some_character"))), quote(list(c1 = "some_character"))) ## mix symbols and characters
+test(7.05, substitute2(list(var1 = var2), I(list(var1 = as.name("c1"), var2 = "some_character"))), quote(list(c1 = "some_character")))
+test(7.06, substitute2(f(lst), I(list(lst = list(1L, 2L)))), substitute(f(lst), list(lst=list(1L,2L)))) ## list elements are enlist'ed into list calls
+test(7.07, substitute2(f(lst), list(lst = I(list(1L, 2L)))), substitute(f(lst), list(lst=list(1L,2L))))
+test(7.08, substitute2(f(lst), list(lst = call("list", 1L, 2L))), quote(f(list(1L, 2L))))
+test(7.09, substitute2(f(lst), list(lst = list(1L, 2L))), quote(f(list(1L, 2L))))
+test(7.10, substitute2(f(lst), list(lst = list(1L, list(2L)))), quote(f(list(1L, list(2L))))) ## character to name and list into list calls works recursively
+test(7.11, substitute2(f(lst), I(list(lst = list(1L, list(2L))))), substitute(f(lst), list(lst=list(1L, list(2L)))))
+f = function(expr, env) { ## using substitute2 from another function
+  eval(substitute(
+    substitute2(.expr, env),
+    list(.expr = substitute(expr))
+  ))
+}
+test(7.12, f(list(var1 = var2), list(var1 = "c1", var2 = 5L)), quote(list(c1 = 5L)))
+
+# data.table i, j, by
+d = data.table(a = 2:1, b = 1:4)
+test(11.01, d[var3%in%values, .(var1 = f(var2)), by=var3,
+  env=list(var1="res", var2="b", f="sum", var3="a", values=0:3),
+  verbose=TRUE], data.table(a=c(2L,1L), res=c(4L,6L)), output=c("Argument 'by' after substitute: a","Argument 'j'  after substitute: .(res = sum(b))","Argument 'i'  after substitute: a %in% 0:3"))
+# data.table symbols and chars
+d = data.table(a = c("b","a"), b = 1:4)
+out = capture.output(ans <- d[var3%in%values, .(var1 = f(var2)), keyby=var3,
+  env=list(var1="res", var2="b", f="sum", var3="a", values=I(c("a","b","c"))),
+  verbose=TRUE]) # could not use output arg in test, so test it manually
+test(11.02, ans, data.table(a=c("a","b"), res=c(6L,4L), key="a"))
+out = grep("Argument.*substitute", out, value=TRUE)
+test(11.021, length(out), 3L) # we expect i, j, by only here, ensure about that
+test(11.022, "Argument 'by' after substitute: a" %in% out, TRUE)
+test(11.023, "Argument 'j'  after substitute: .(res = sum(b))" %in% out, TRUE)
+test(11.024, "Argument 'i'  after substitute: a %in% c(\"a\", \"b\", \"c\")" %in% out, TRUE)
+out = capture.output(ans <- d[var3%in%values, .(var1 = f(var2)), keyby=var3,
+  env=I(list(var1=as.name("res"), var2=as.name("b"), f=as.name("sum"), var3=as.name("a"), values=c("b","c"))),
+  verbose=TRUE])
+test(11.03, ans, data.table(a=c("b"), res=c(4L), key="a"))
+out = grep("Argument.*substitute", out, value=TRUE)
+test(11.031, length(out), 3L)
+test(11.032, "Argument 'by' after substitute: a" %in% out, TRUE)
+test(11.033, "Argument 'j'  after substitute: .(res = sum(b))" %in% out, TRUE)
+test(11.034, "Argument 'i'  after substitute: a %in% c(\"b\", \"c\")" %in% out, TRUE)
+# substitute2 during join
+d1 = data.table(id1=1:4, v1=5)
+d2 = data.table(id1=c(0L,2:3), v1=6)
+out = capture.output(ans <- d1[d2, on="id1<=id1", .(c1, c2, c3, c4), env=list(c1="x.id1", c2="i.id1", c3="x.v1", c4="i.v1"), verbose=TRUE])
+test(11.041, ans, data.table(x.id1=c(NA,1:2,1:3), i.id1=c(0L,2L,2L,3L,3L,3L), x.v1=c(NA,rep(5,5)), i.v1=rep(6,6)))
+out = grep("Argument.*substitute", out, value=TRUE)
+test(11.042, length(out), 2L) ## 2L because i is non-missing attempt to substitute is made
+test(11.043, "Argument 'j'  after substitute: .(x.id1, i.id1, x.v1, i.v1)" %in% out, TRUE)
+d1 = data.table(id1=c(2L,4L,2L,4L), v1=5)
+d2 = data.table(id1=c(0L,2:3), v1=6)
+out = capture.output(ans <- d1[dd, on="id1<=id1", .(sum(c3), sum(c4)), by=by, env=list(dd="d2", c3="x.v1", c4="i.v1", by=".EACHI"), verbose=TRUE])
+test(11.044, ans, data.table(id1=c(0L,2L,3L), V1=c(NA,10,10), V2=c(6,6,6)))
+out = grep("Argument.*substitute", out, value=TRUE)
+test(11.045, length(out), 3L)
+test(11.046, "Argument 'by' after substitute: .EACHI" %in% out, TRUE)
+test(11.047, "Argument 'j'  after substitute: .(sum(x.v1), sum(i.v1))" %in% out, TRUE)
+test(11.048, "Argument 'i'  after substitute: d2" %in% out, TRUE)
+dt1 = data.table(x = letters[1:5], y = 1:5)
+dt2 = data.table(x = letters[1:3], y = 11:13)
+target_v = "y"
+source_v = paste0("i.", target_v)
+on_v = "x"
+out = capture.output(invisible(dt1[dt2, target_v := source_v, on = on_v, env = list(target_v = target_v, source_v = source_v), verbose=TRUE]))
+out = grep("Argument.*substitute", out, value=TRUE)
+test(11.049, length(out), 2L)
+test(11.050, dt1, data.table(x = c("a", "b", "c", "d", "e"), y = c(11L, 12L, 13L, 4L, 5L)))
+# substitute special symbols
+d = data.table(V1=1:2, V2=1:4)
+test(11.051, d[, j, by, env=list(j=".N", by="V1")], data.table(V1=c(1L,2L), N=c(2L,2L)))
+test(11.052, d[, j, by, env=list(j=".SD", by="V1")], data.table(V1=c(1L,1L,2L,2L), V2=c(1L,3L,2L,4L)))
+test(11.053, d[, j, env=I(list(j=as.name(".N")))], 4L)
+test(11.054, d[, .(op, fun(col)), by=by, env=list(op=".N", fun="sum", col="V2", by="V1")], data.table(V1=1:2, N=c(2L,2L), V2=c(4L,6L)))
+# get and mget use cases
+d = as.data.table(lapply(1:5, rep, 2L))
+setnames(d, paste0("c",1:5))
+v1 = "c1"; v2 = "c2"; v3 = "c3"; v4 = "c4"; v5 = "c5"
+test(11.061, d[, v1, env=list(v1=v1)], d[, get(v1)])                      ## symbol c1
+test(11.062, d[, v1, env=list(v1=I(v1))], data.table(c1=c(1L,1L)))        ## character "c1"
+test(11.063, d[, list(v1), env=list(v1=v1)], d[, mget(v1)])               ## symbol c1 in list
+test(11.064, d[, v1v2, env=list(v1v2=I(c(v1,v2)))], d[, mget(c(v1, v2))]) ## character c("c1","c2")
+test(11.065, d[, v1v2, env=list(v1v2=as.list(c(v1,v2)))], d[, mget(c(v1, v2))]) ## call list(c1,c2) ## auto-enlist
+test(11.066, d[, .(v1), env=list(v1=v1)], data.table(c1=c(1L,1L)))                        ## d[, .(get(v1))] - (m)get would return unnamed columns
+test(11.067, d[, .(v1, v2), env=list(v1=v1, v2=v2)], data.table(c1=c(1L,1L),c2=c(2L,2L))) ## d[, .(get(v1), get(v2))]
+test(11.068, d[, .(sum(v1)), env=list(v1=v1)], d[, .(sum(get(v1)))])
+test(11.069, d[, lapply(vN, sum), env=list(vN=as.list(setNames(nm = c(v1, v3))))], d[, lapply(mget(c(v1,v3)), sum)])
+test(11.070, d[, c(list(c1=c1, c2=c2), list(v3=v3), list(v4=v4, v5=v5)), env=list(v3=v3,v4=v4,v5=v5)], d) ## d[, c(list(c1, c2), list(get(v3)), mget(c(v4,v5)))] - some are unnamed
+# empty input
+d = data.table(x=1:2, y=1:4)
+test(11.081, d[.i, env=list(.i=substitute()), verbose=TRUE], d, notOutput="after substitute")
+test(11.082, d[.i, .j, .by, env=list(.i=substitute(), .j=substitute(), .by=substitute()), verbose=TRUE], d, notOutput="after substitute")
+f = function(x, i, j, by) {
+  x[.i, .j, .by, env=list(.i=substitute(i), .j=substitute(j), .by=substitute(by)), verbose=TRUE]
+}
+test(11.083, f(d), d)
+test(11.084, f(d, 1), d[1], output="Argument 'i'  after substitute", notOutput="Argument 'j'  after substitute")
+test(11.085, f(d,, 1), d[,1], output="Argument 'j'  after substitute", notOutput="Argument 'i'  after substitute")
+test(11.086, f(d, 1, 1), d[1, 1], output="Argument 'j'  after substitute.*Argument 'i'  after substitute")
+
+#1985 weird exception when by contains get
+tb = data.table(x=c(1,2), y=c(3,4), z=c(5,6), w=c("a","b"))
+test(11.101, tb[w != "b", .(x=sum(x)), by=.(y, zz=.z), env=list(.z="z")], data.table(y=3, zz=5, x=1))
+dtIris = as.data.table(iris)
+speciesVar = "Species"
+test(11.102, dtIris[Sepal.Length > 4, .N, by = .(var = .speciesVar, Petal.Width), env = list(.speciesVar = speciesVar)], dtIris[Sepal.Length > 4, .N, by = .(var = Species, Petal.Width)])
+#2589 Need an easier way to use dynamically determined symbols
+dt = data.table(x1 = 1:10, x2 = 10:1, x3 = 1:10)
+s1 = "x2"; s2 = "x3"
+test(11.103, dt[, s1 * s2, env=list(s1=s1,s2=s2)], c(10L, 18L, 24L, 28L, 30L, 30L, 28L, 24L, 18L, 10L))
+#2884 Alternative way to dynamic symbol usage in `j`
+dt = data.table(id = rep(1:2, 5), x1 = rnorm(10), x2 = rnorm(10), y1 = rnorm(10), y2 = rnorm(10))
+test(11.104, dt[, .(xsum = sum(x), ysum = sum(y)), by = id, env = list(x = "x1", y = "y2")], dt[, .(xsum=sum(x1), ysum=sum(y2)), by=id])
+#2816 Possible regression for programmatic use in `j`
+dt = data.table(x=1:3)
+var = "x"
+dt[, var := var+1L, env=list(var="x")]
+test(11.105, dt, data.table(x=2:4))
+# injecting quoted expressions
+#750 `by=list(eval(as.name("colA")))` renames column
+DT = data.table(colA=1:4, colB=5:8, colC=9:12)
+test(11.106, DT[, sum(colA), by=list(grp_name=grp), env=list(grp_name="colA", grp="colA")], data.table(colA=1:4, V1=1:4))
+#2432 Add Programmable NSE
+co2 = as.data.table(CO2)
+Jexp1 = quote(max(conc))
+Jexp2 = quote(mean(conc))
+Jexp = substitute(list(Jexp1, round(Jexp2)), list(Jexp1=Jexp1, Jexp2=Jexp2))
+out = capture.output(ans <- co2[, j, by=Type, env=list(j=Jexp), verbose=TRUE])
+test(11.107, ans, data.table(Type=factor(c("Quebec","Mississippi"), levels=c("Quebec","Mississippi")), V1=c(1000,1000), V2=c(435,435)))
+out = grep("Argument.*substitute", out, value=TRUE)
+test(11.108, length(out), 2L)
+test(11.109, "Argument 'by' after substitute: Type" %in% out, TRUE)
+test(11.110, "Argument 'j'  after substitute: list(max(conc), round(mean(conc)))" %in% out, TRUE)
+#628 Change j=list(xout=eval(...))'s eval to eval within scope of DT
+dat = data.table(x_one=1:10, x_two=1:10, y_one=1:10, y_two=1:10)
+f = function(vars) as.call(c(quote(list), lapply(setNames(vars, paste(vars,"out",sep="_")), function(var) substitute2(one-two, list(one=paste(var,"one",sep="_"), two=paste(var,"two",sep="_"))))))
+test(11.111, dat[, j, env=list(j = f(c("x","y")))], dat[, list(x_out = x_one - x_two, y_out = y_one - y_two)])
+
+# vignette examples
+square = function(x) x^2
+test(12.01,
+     substitute2(outer(inner(var1) + inner(var2)), env = list(outer = "sqrt", inner = "square", var1 = "a", var2 = "b")),
+     quote(sqrt(square(a) + square(b))))
+DT = as.data.table(iris)
+test(12.02,
+     DT[, outer(inner(var1) + inner(var2)), env = list(outer = "sqrt", inner = "square", var1 = "Sepal.Length", var2 = "Sepal.Width")],
+     DT[, sqrt(square(Sepal.Length) + square(Sepal.Width))])
+test(12.03, # return as data.table, substitute call argument name
+     DT[, .(Species, var1, var2, out = outer(inner(var1) + inner(var2))), env = list(outer = "sqrt", inner = "square", var1 = "Sepal.Length", var2 = "Sepal.Width", out = "Sepal.Hypotenuse")],
+     DT[, .(Species, Sepal.Length, Sepal.Width, Sepal.Hypotenuse = sqrt(square(Sepal.Length) + square(Sepal.Width)))])
+test(12.04, # i, j, by
+     DT[filter_col %in% filter_val, .(var1, var2, out = outer(inner(var1) + inner(var2))), by = by_col, env = list(outer = "sqrt", inner = "square", var1 = "Sepal.Length", var2 = "Sepal.Width", out = "Sepal.Hypotenuse", filter_col = "Species", filter_val = I(c("versicolor", "virginica")), by_col =  "Species")],
+     DT[Species %in% c("versicolor","virginica"), .(Sepal.Length, Sepal.Width, Sepal.Hypotenuse = sqrt(square(Sepal.Length) + square(Sepal.Width))), by = Species])
+test(12.05, # like base R, env AsIs class
+     substitute2(rank(input, ties.method = ties), env = I(list(input = as.name("Sepal.Width"), ties = "first"))),
+     quote(rank(Sepal.Width, ties.method = "first")))
+test(12.06, # only particular elements of env are AsIs class
+     substitute2(rank(input, ties.method = ties), env = list(input = "Sepal.Width", ties = I("first"))),
+     quote(rank(Sepal.Width, ties.method = "first")))
+test(12.07, # all are symbols
+     substitute2(f(v1, v2), list(v1 = "a", v2 = list("b", list("c", "d")))),
+     quote(f(a, list(b, list(c, d)))))
+test(12.08, # 'a' and 'd' should stay as character
+     substitute2(f(v1, v2), list(v1 = I("a"), v2 = list("b", list("c", I("d"))))),
+     quote(f("a", list(b, list(c, "d")))))
+cols = c("Sepal.Length", "Sepal.Width")
+test(12.09, # data.table automatically enlist nested lists into list calls
+     DT[, j, env = list(j = as.list(cols))],
+     DT[, list(Sepal.Length, Sepal.Width)])
+test(12.10, # turning above 'j' list into a list call
+     DT[, j, env = list(j = quote(list(Sepal.Length, Sepal.Width)))],
+     DT[, list(Sepal.Length, Sepal.Width)])
+test(12.11, # the same as above but accepts character vector
+     DT[, j, env = list(j = as.call(c(quote(list), lapply(cols, as.name))))],
+     DT[, list(Sepal.Length, Sepal.Width)])
+test(12.12, # list of symbols
+     DT[, j, env = I(list(j = lapply(cols, as.name))), verbose = TRUE],
+     error = "j-argument should be",
+     output = "list(Sepal.Length, Sepal.Width)")
+test(12.13, substitute2(j, env = I(list(j = lapply(cols, as.name)))), lapply(cols, as.name))
+test(12.14, substitute2(j, env = list(j = as.list(cols))), as.call(c(quote(list), lapply(cols, as.name))))
+outer = "sqrt"; inner = "square"; vars = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
+syms = lapply(vars, as.name)
+to_inner_call = function(var, fun) call(fun, var)
+inner_calls = lapply(syms, to_inner_call, inner)
+test(12.15, inner_calls, list(quote(square(Sepal.Length)), quote(square(Sepal.Width)), quote(square(Petal.Length)), quote(square(Petal.Width))))
+to_add_call = function(x, y) call("+", x, y)
+add_calls = Reduce(to_add_call, inner_calls)
+test(12.16, add_calls, quote(square(Sepal.Length) + square(Sepal.Width) + square(Petal.Length) + square(Petal.Width)))
+rms = substitute2(expr = outer((add_calls) / len), env = list(outer = outer, add_calls = add_calls, len = length(vars)))
+test(12.17, rms, quote(sqrt((square(Sepal.Length) + square(Sepal.Width) + square(Petal.Length) + square(Petal.Width))/4L)))
+test(12.18,
+     DT[, j, env = list(j = rms)],
+     DT[, sqrt((square(Sepal.Length) + square(Sepal.Width) + square(Petal.Length) + square(Petal.Width))/4L)])
+test(12.19, # same but skipping last substitute2 call and using add_calls directly
+     DT[, outer((add_calls) / len), env = list(outer = outer, add_calls = add_calls, len = length(vars))],
+     DT[, sqrt((square(Sepal.Length) + square(Sepal.Width) + square(Petal.Length) + square(Petal.Width))/4L)])
+j = substitute2(j, list(j = as.list(setNames(nm = c(vars, "Species", "rms"))))) # return as data.table
+j[["rms"]] = rms
+test(12.20,
+     DT[, j, env = list(j = j)],
+     DT[, .(Sepal.Length=Sepal.Length, Sepal.Width=Sepal.Width, Petal.Length=Petal.Length, Petal.Width=Petal.Width, Species, rms = sqrt((square(Sepal.Length) + square(Sepal.Width) + square(Petal.Length) + square(Petal.Width))/4L))])
+j = as.call(c( # alternatively
+  quote(list),
+  lapply(setNames(nm = vars), as.name),
+  list(Species = as.name("Species")),
+  list(rms = rms)
+))
+test(12.21,
+     DT[, j, env = list(j = j)],
+     DT[, .(Sepal.Length=Sepal.Length, Sepal.Width=Sepal.Width, Petal.Length=Petal.Length, Petal.Width=Petal.Width, Species, rms = sqrt((square(Sepal.Length) + square(Sepal.Width) + square(Petal.Length) + square(Petal.Width))/4L))])
+v1 = "Petal.Width" # get
+v2 = "Sepal.Width"
+test(12.22,
+     DT[, .(total = sum(v1, v2)), env = list(v1 = v1, v2 = v2)],
+     DT[, .(total = sum(get(v1), get(v2)))])
+v = c("Petal.Width", "Sepal.Width") # mget
+test(12.23,
+     DT[, lapply(v, mean), env = list(v = as.list(v))],
+     DT[, lapply(list(Petal.Width, Sepal.Width), mean)])
+test(12.24,
+     DT[, lapply(v, mean), env = list(v = as.list(setNames(nm = v)))],
+     DT[, lapply(mget(v), mean)])
+cl = quote(.(Petal.Width = mean(Petal.Width), Sepal.Width = mean(Sepal.Width)))
+test(12.25, DT[, cl, env = list(cl = cl)], DT[, eval(cl)])
+
+#######################
+# contributed use cases
+#######################
+
+# renkun-ken
+dt = as.data.table(list( ## RNGversion("3.5.0"); set.seed(108); round(numeric(), 4)
+  symbol = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L),
+  date = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L),
+  grp1 = c(1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L),
+  grp2 = c(3L, 3L, 3L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 3L, 2L, 1L, 3L, 3L, 1L, 1L, 3L, 3L, 1L, 3L, 3L, 2L, 1L, 2L, 2L, 3L, 2L),
+  x0 = c(1.1396, -0.2706, -2.2801, -0.1572, -1.0671, -0.9666, -0.8071, -0.23, -0.1626, 1.4347, -0.2234, 0.5613, -0.7084, 0.2598, -0.2023, 1.8624, 0.5209, -1.561, -1.2297, -1.0064, -0.9782, -0.1291, -2.275, 0.5268, -0.5316, 2.3234, 0.0556, -0.3623, -0.5695, -0.0142),
+  x1 = c(1.3553, 1.2909, -0.8958, -0.3677, 1.0041, 1.1247, -0.0595, 0.7503, 0.3503, -1.559, -1.6823, -0.0906, 0.7874, 0.2785, -0.1712, -1.5325, 0.408, 0.5981, -1.1464, -0.2233, -0.0635, 0.4461, -1.9813, -0.7281, 1.1216, -0.0516, 1.373, 0.2388, 0.6257, -0.0551),
+  x2 = c(-0.2457, -0.9797, 0.3957, -1.094, -1.1973, 0.3137, 0.2004, -1.9404, 1.6927, -0.4063, 0.0731, -0.3338, -2.2683, -1.1105, 0.2115, -0.0163, 0.2139, 0.5016, 0.2296, 0.4189, 0.3295, 0.0408, 1.4633, -0.7118, 0.4811, 0.4499, -0.4214, 0.1503, -0.2222, 0.4573),
+  x3 = c(1.3439, 0.3841, -0.4787, -0.6312, -0.5481, -0.8703, -1.2684, -1.4851, 0.6789, 0.1575, 2.7873, -1.1201, 0.1337, -0.6053, -0.6538, 0.4597, -0.8955, 0.1625, 1.3767, 0.6024, -1.2141, -1.3534, -0.6583, -0.095, 1.1923, 0.3062, -0.6818, 0.2407, -0.8534, -1.4521),
+  y1 = c(-0.2159, 0.8934, 0.0216, -1.0682, 1.2549, -0.1517, 1.4404, 1.3436, -2.1388, -0.2453, -1.4628, -1.7654, 0.6437, -0.9685, -0.9393, 0.0962, -0.2041, 1.1007, -1.8705, 0.2053, -0.9238, -0.6301, 1.9876, 1.2862, 0.3363, -0.334, -1.5149, -1.3254, 0.5716, -0.7165),
+  y2 = c(-0.5962, 0.3394, -0.2971, -0.6241, -0.5279, 1.1945, -0.152, 0.8207, 0.8731, 0.2281, 0.3466, -1.4862, -0.4694, 0.0435, 0.9888, -0.0797, 0.7109, -0.6636, -0.4402, 1.0093, -0.0655, 0.5099, 1.5415, 1.8833, -1.2365, 0.5085, 0.7073, -0.2191, 0.2442, 0.1501),
+  y3 = c(0.6222, -0.7174, -1.9616, -0.0117, -0.114, 0.1313, -1.3854, 1.5021, -0.7115, 0.4822, 1.8474, 1.1742, 0.8192, 0.2819, -1.3365, -0.6179, -0.9706, 0.2179, -1.2654, 1.0065, -2.2514, -0.7161, 0.9578, -0.0335, 0.3166, 0.0471, -0.9983, -0.6455, 1.4064, 0.2954)))
+xs = c("x", "y") ## apply same formula to different set of columns
+out = vector("list", length(xs))
+names(out) = xs
+for (x in xs) {
+  out[[x]] = capture.output(invisible(dt[, RATIO := (R3 - R2) * (R2 - R1) * (R3 - R1) / sqrt(R1^2 + R2^2 + R3^2),
+    env = list(RATIO = paste0(x, "_ratio"), R1 = paste0(x, 1), R2 = paste0(x, 2), R3 = paste0(x, 3)),
+    verbose = TRUE])) # assign to nul, other
+}
+x_rat = c(0.0150761734954921, 1.68603966340262, -0.432117480975587, 0.0673302370985585,
+1.3396117186265, -1.31542975195976, 0.358990921654875, 1.07137398842599, -0.240804570258909, 0.689134697166349, 6.53944855876942, -0.167936293758913, 1.99518595021054, 0.478886131900058, 0.225672526235629, 0.898595029001403, -0.278725254056844, -0.0178774591562397, 2.20493313305713, 0.126869315798536, 0.554130827073314, -0.713268530169861, -3.79227895596263, 0.00622410754980975, -0.0188758915276097, -0.0471688415642347, -0.60391972591766, -4.09856489441073e-05, -0.732101471917737, 0.897197218930381)
+y_rat = c(-0.437137931952723, -0.789182136098114, -0.530238437504097, 0.232242653273211, 0.739369921650875, -0.334413400872578, -2.76908561851941, -0.0259528361203494, -2.81810697204509, 0.149050554297973, 3.77409495341661, 0.84329199487865, -0.220290266022232, 0.298795199314652, 0.932599183107379, -0.107238527606129, 0.966425089066359, 1.05320054480325, -0.310406226974414, -0.00125245906648534, 1.02314586034282, 0.111130598215941, -0.0996278782862306, 0.66222170820334, 0.0364570881136429, -0.242779893874194, -1.00552326863148, -0.215191768368067, -0.206580227824426, 0.16140646232964)
+test(101.01, dt$x_ratio, x_rat)
+test(101.02, dt$y_ratio, y_rat)
+test(101.03, length(grep("Argument.*substitute", out[["x"]], value=TRUE)), 1L)
+test(101.04, length(grep("Argument.*substitute", out[["y"]], value=TRUE)), 1L)
+test(101.05, "Argument 'j'  after substitute: `:=`(x_ratio, (x3 - x2) * (x2 - x1) * (x3 - x1)/sqrt(x1^2 + x2^2 + x3^2))" %in% out[["x"]], TRUE)
+test(101.06, "Argument 'j'  after substitute: `:=`(y_ratio, (y3 - y2) * (y2 - y1) * (y3 - y1)/sqrt(y1^2 + y2^2 + y3^2))" %in% out[["y"]], TRUE)
+daily_cor = function(data, x, y) { ## daily correlation of user input features
+  data[, .(cor = cor(x, y)),
+    keyby = date,
+    env = list(x = x, y = y),
+    verbose = TRUE]
+}
+out = capture.output(ans <- daily_cor(dt, "x0", "y2"))
+test(101.07, length(grep("Argument.*substitute", out, value=TRUE)), 2L) ## 'by' (or 'keyby') is not substituted here but it still goes via substitute2 because it is non-missing
+test(101.08, "Argument 'by' after substitute: date" %in% out, TRUE)
+test(101.09, "Argument 'j'  after substitute: .(cor = cor(x0, y2))" %in% out, TRUE)
+group_cor = function(data, x, y, g) { ## group cor comparison of user input features
+  cor_dt = data[, lapply(.SD, function(x) cor(x, Y)),
+    keyby = .(group = GROUP),
+    .SDcols = x,
+    env = list(Y = y, GROUP = g),
+    verbose = TRUE]
+  melt.data.table(cor_dt, id.vars = "group", measure.vars = x, variable.name = "x", value.name = "cor", variable.factor = FALSE) ## not relevant but lets keep it for completeness
+}
+out = capture.output(dt1 <- group_cor(dt, c("x0", "x1", "x2"), "y1", "grp1"))
+test(101.10, length(grep("Argument.*substitute", out, value=TRUE)), 2L)
+test(101.11, "Argument 'by' after substitute: .(group = grp1)" %in% out, TRUE)
+test(101.12, "Argument 'j'  after substitute: lapply(.SD, function(x) cor(x, y1))" %in% out, TRUE)
+out = capture.output(dt2 <- group_cor(dt, c("x0", "x1", "x2"), "y1", "grp2"))
+test(101.13, length(grep("Argument.*substitute", out, value=TRUE)), 2L)
+test(101.14, "Argument 'by' after substitute: .(group = grp2)" %in% out, TRUE)
+test(101.15, "Argument 'j'  after substitute: lapply(.SD, function(x) cor(x, y1))" %in% out, TRUE)
+stats_dt1 = as.data.table(list(
+  x = c("x0", "x1", "x2"),
+  min = c(-0.325967794724422, -0.126026585686073, -0.398950077203113),
+  mean = c(-0.277318407860876, -0.0164428001010045, -0.220868266148565),
+  max = c(-0.22866902099733, 0.0931409854840638, -0.0427864550940165)
+), key="x")
+test(101.16, dt1[, .(min = min(cor), mean = mean(cor), max = max(cor)), keyby = x], stats_dt1) ## post aggregation with known colnames, not relevant but lets keep it for completeness
+stats_dt2 = as.data.table(list(
+  x = c("x0", "x1", "x2"),
+  min = c(-0.392714958827804, -0.339274985404091, -0.45937864657761),
+  mean = c(-0.279968323960171, 0.150866984990403, 0.0838779176840593),
+  max = c(-0.180337725136444, 0.697473394580653, 0.714679537878464)
+), key="x")
+test(101.17, dt2[, .(min = min(cor), mean = mean(cor), max = max(cor)), keyby = x], stats_dt2)
+set.seed(108) ## to many values to hardcode
+yn = c(1, 5, 10, 20)
+ycols = paste0("y", yn)
+ydt = data.table(symbol = rep(1:3, each = 100))
+ydt[, date := seq_len(.N), by = symbol]
+ydt[, ret := rnorm(.N)]
+ydt[, (ycols) := shift(ret, yn, type = "lead"), by = symbol]
+xdt = data.table(symbol = rep(1:2, each = 20))
+xdt[, date := seq_len(.N), by = symbol]
+xdt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))]
+cor_xy = function(xdt, ydt, x, y) { ## cor between each x and a single y
+  xdt[ydt, y := Y, on = .(symbol, date),
+    env = list(Y = y),
+    verbose = TRUE]
+  on.exit(xdt[, y := NULL])
+  xdt[, lapply(.SD, cor, y = y), keyby = symbol, .SDcols = x]
+}
+out = capture.output(ans <- cor_xy(xdt, ydt, c("x1", "x2"), "y10"))
+exp = as.data.table(list(symbol = 1:2, x1 = c(0.529292252112253, 0.0301956035638738), x2 = c(0.287076866252898, -0.335969587268599)), key="symbol")
+test(102.01, ans, exp)
+test(102.02, length(grep("Argument.*substitute", out, value=TRUE)), 2L)
+test(102.03, "Argument 'j'  after substitute: `:=`(y, y10)" %in% out, TRUE)
+test(102.04, "Argument 'i'  after substitute: ydt" %in% out, TRUE)
+cor_xy2 = function(xdt, ydt, x, y) { ## cor between each pair of x and y
+  rbindlist(lapply(y, function(yi) {
+    xdt[ydt, y := Y, on = .(symbol, date),
+        env = list(Y = yi)]
+    on.exit(xdt[, y := NULL])
+    rbindlist(lapply(x, function(xi) {
+      xdt[, .(x = xi, y = yi, cor = cor(X, y)), keyby = symbol,
+        env = list(X = xi)]
+    }))
+  }))
+}
+cor_dt = cor_xy2(xdt, ydt, c("x1", "x2"), ycols)
+exp = as.data.table(list(
+  symbol = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L),
+  x = c("x1", "x1", "x2", "x2", "x1", "x1", "x2", "x2", "x1", "x1", "x2", "x2", "x1", "x1", "x2", "x2"),
+  y = c("y1", "y1", "y1", "y1", "y5", "y5", "y5", "y5", "y10", "y10", "y10", "y10", "y20", "y20", "y20", "y20"),
+  cor = c(0.0963296961360529, -0.155702586981777, 0.45855688298414, -0.0867798048307359, -0.272158447799069, 0.0969909109333228, -0.172091337596075, -0.231918279862371, 0.529292252112253, 0.0301956035638738, 0.287076866252898, -0.335969587268599, 0.489259093604126, 0.190094143537513, 0.382176633086643, -0.0481151265706696)
+))
+test(102.05, cor_dt, exp)
+cor_xy3 = function(xdt, ydt, x, y) { ## cor matrix of existing columns and dynamically in-place merged columns
+  cl = as.call(lapply(setNames(c(":=", y), c("", y)), as.name))
+  xdt[ydt, j, on = .(symbol, date),
+      env = list(j=cl)]
+  on.exit(xdt[, (y) := NULL])
+  xdt[, cor(.SD), .SDcols = c(x, y)]
+}
+cor_mx = cor_xy3(xdt, ydt, c("x1", "x2"), ycols)
+exp = structure(c(
+  1, 0.242249239102964, -0.0286729531730845, -0.0936087330415663, 0.245575245812681, 0.323778522797129, 0.242249239102964, 1, 0.199165327684089, -0.160954354243643, 0.0034174556771777, 0.185518712777259, -0.0286729531730845, 0.199165327684089, 1, -0.164047186655086, -0.0689536633998918, -0.0326400434160486, -0.0936087330415663, -0.160954354243643, -0.164047186655086, 1, -0.0810998892055976, -0.106457956110047, 0.245575245812681, 0.0034174556771777, -0.0689536633998918, -0.0810998892055976, 1, 0.324977066952494, 0.323778522797129, 0.185518712777259, -0.0326400434160486, -0.106457956110047, 0.324977066952494, 1
+  ), .Dim = c(6L, 6L), .Dimnames = list(
+    c("x1", "x2", "y1", "y5", "y10", "y20"),
+    c("x1", "x2", "y1", "y5", "y10", "y20")
+))
+test(102.06, cor_mx, exp)
+nadt = data.table(x1 = c(1, 2, NA, Inf), x2 = c(2, NA, 3, Inf), x3 = c(NA, 1, 2, 0)) ## fill abnormal values of multiple columns
+dt_fill = function(data, columns, selector, fill) {
+  selector = match.fun(selector)
+  for (col in columns) {
+    data[selector(X), X := fill, env = list(X = col)]
+  }
+}
+dt_fill(nadt, c("x1", "x2", "x3"), is.na, 0)
+test(103.01, nadt, data.table(x1 = c(1, 2, 0, Inf), x2 = c(2, 0, 3, Inf), x3 = c(0, 1, 2, 0)))
+dt_fill(nadt, c("x1", "x2", "x3"), is.infinite, 0)
+test(103.02, nadt, data.table(x1 = c(1, 2, 0, 0), x2 = c(2, 0, 3, 0), x3 = c(0, 1, 2, 0)))
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index f3adef959d..9514dd5820 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -30,6 +30,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
   compactprint = data.table:::compactprint
   cube.data.table = data.table:::cube.data.table
   dcast.data.table = data.table:::dcast.data.table
+  if (!exists('endsWith', 'package:base', inherits=FALSE)) endsWith = data.table:::endsWith
   forder = data.table:::forder
   forderv = data.table:::forderv
   format.data.table = data.table:::format.data.table
@@ -53,6 +54,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
   shallow = data.table:::shallow # until exported
   .shallow = data.table:::.shallow
   split.data.table = data.table:::split.data.table
+  if (!exists('startsWith', 'package:base', inherits=FALSE)) startsWith = data.table:::startsWith
   test = data.table:::test
   uniqlengths = data.table:::uniqlengths
   uniqlist = data.table:::uniqlist
@@ -101,6 +103,42 @@ if (!test_longdouble) {
   # e.g. under valgrind, longdouble.digits==53; causing these to fail: 1262, 1729.04, 1729.08, 1729.09, 1729.11, 1729.13, 1830.7; #4639
 }
 
+# generate simple error messages from base that are checked against in our tests. this helps
+#   protect us against these messages evolving in base in the future, and against these messages
+#   potentially not being produced in English.
+# Three use cases:
+#   (1) match message exactly [missing delim]
+#   (2) match message pattern after dropping anything between delimeters [delim, fmt=FALSE]
+#   (3) function factory for matching messages exactly by substituting anything between delimeters [delim, fmt=TRUE]
+get_msg = function(e, delim, fmt=FALSE) {
+  msg = tryCatch(e, error=identity, warning=identity)$message
+  if (missing(delim)) return(msg)
+  if (length(delim) == 1L) delim[2L] = delim[1L]
+  msg = gsub(
+    sprintf("%1$s[^%2$s]+%2$s", delim[1L], delim[2L]),
+    sprintf("%s%s%s", delim[1L], if (fmt) "%s" else ".+", delim[2L]),
+    msg
+  )
+  if (fmt) return(function(x) sprintf(msg, x))
+  return(msg)
+}
+base_messages = list(
+  missing_object = get_msg(`__dt_test_missing_` + 1, "'", fmt=TRUE),
+  missing_function = get_msg(`__dt_test_missing_`(), '"', fmt=TRUE),
+  invalid_arg_unary_operator = get_msg(-'a'),
+  invalid_arg_binary_operator = get_msg(1 + 'a'),
+  invalid_arg_sum = get_msg(sum('a'), c("\\(", "\\)"), fmt=TRUE),
+  arg_length_mismatch = get_msg(base::order(1, 1:2)),
+  empty_max = get_msg(max(numeric())),
+  empty_min = get_msg(min(numeric())),
+  coerce_na = get_msg(as.integer('a')),
+  locked_binding = get_msg({e = new.env(); e$x = 1; lockBinding('x', e); e$x = 2}, "'", fmt=TRUE),
+  missing_file = get_msg({tmp <- tempfile(tmpdir=tempfile("xxx")); file(tmp, "w")}, "'"),
+  # gives both error & warning but tryCatch returns the warning first, so suppress
+  cant_open_file = get_msg(suppressWarnings({con<-file(tempfile()); open(con, 'r')})),
+  mixed_subscripts = get_msg(letters[-1:1])
+)
+
 ##########################
 
 test(1.1, tables(env=new.env()), null.data.table(), output = "No objects of class")
@@ -977,7 +1015,7 @@ DT = data.table(a=1:5, b=6:10, c=11:15)
 test(327, within(DT,rm(a,b)), data.table(c=11:15))
 test(328, within(DT,rm(b,c)), data.table(a=1:5))
 test(329, within(DT,rm(b,a)), data.table(c=11:15))
-test(330, within(DT,rm(b,c,d)), data.table(a=1:5), warning="object 'd' not found")
+test(330, within(DT,rm(b,c,d)), data.table(a=1:5), warning=base_messages$missing_object("d"))
 DT[,c("b","a")]=NULL
 test(332, DT, data.table(c=11:15))
 test(333, within(DT,rm(c)), data.table(NULL))
@@ -1119,8 +1157,8 @@ test(378, cbind(), NULL)
 test(379, rbind(), NULL)
 
 DT = data.table(a=rep(1:3,1:3),b=1:6)
-test(380, DT[,{.SD$b[1]=10L;.SD}, by=a], error="locked binding")  # .SD locked for 1st group
-test(381, DT[,{if (a==2) {.SD$b[1]=10L;.SD} else .SD}, by=a], error="locked binding") # .SD locked in 2nd group onwards too
+test(380, DT[,{.SD$b[1]=10L;.SD}, by=a], error=base_messages$locked_binding(".SD"))  # .SD locked for 1st group
+test(381, DT[,{if (a==2) {.SD$b[1]=10L;.SD} else .SD}, by=a], error=base_messages$locked_binding(".SD")) # .SD locked in 2nd group onwards too
 
 # test that direct := is trapped, but := within a copy of .SD is allowed (FAQ 4.5). See also tests 556-557.
 test(382, DT[,b:=.N*2L,by=a], data.table(a=rep(1:3,1:3),b=rep(2L*(1:3),1:3)))
@@ -1588,7 +1626,7 @@ test(534, names(transform(data.table('a b'=1), `c d`=`a b`)), c("a b","c d"))
 
 # Test keyby, new in v1.8.0
 DT = data.table(a=INT(1,3,1,2,3,2),b=1:2,c=1:3,v=1:6)
-test(535, DT[,sum(v),by=a, keyby=a], error="not both")
+test(535, DT[,sum(v),by=a, keyby=a], error="When.*both.*keyby must be TRUE or FALSE") # updated after #4307
 test(536, DT[,sum(v),by=a], data.table(a=c(1L,3L,2L),V1=c(4L,7L,10L)))  # retains appearance order
 ans = data.table(a=1:3,V1=c(4L,10L,7L),key="a")
 test(537, DT[,sum(v),keyby=a], ans)
@@ -1672,7 +1710,7 @@ test(570.1, DT[,list(.I=.I),list(a,b)][,.I,a], error="The column '.I' can't be g
 DT = data.table("a "=1:2, "b"=3:4," b"=5:6, v=1:6)
 test(571, DT[,sum(v),by="b, b"], data.table("b"=3:4, " b"=5:6, V1=c(9L,12L)))
 test(572, DT[,sum(v),by="a , b"], data.table("a "=1:2, " b"=5:6, V1=c(9L,12L)))
-test(573, DT[,sum(v),by="b, a"], error="object ' a' not found")
+test(573, DT[,sum(v),by="b, a"], error=base_messages$missing_object(" a"))
 
 # Test base::unname, used by melt, and only supported by data.table for DF compatibility for non-dtaware packages
 DT = data.table(a=1:3, b=4:6)
@@ -2036,7 +2074,7 @@ if (ncol(DT)==2L) setnames(DT,c("A","B")) # else don't stop under torture with s
 test(714, DT[,z:=6:10], data.table(A=1:5,B=5,z=6:10))
 
 # Test J alias is now removed outside DT[...] from v1.8.7 (to resolve rJava::J conflict)
-test(715, J(a=1:3,b=4), error="could not find function.*J")
+test(715, J(a=1:3,b=4), error=base_messages$missing_function("J"))
 
 # Test get in j
 DT = data.table(a=1:3,b=4:6)
@@ -2254,7 +2292,7 @@ test(811, DT[c("b","foo","c"),which=NA,nomatch=0], error="which=NA with nomatch=
 DT = data.table(a=1:3,b=4:6,c=7:9)
 # old tests using with=FALSE retained.  Eventually will deprecate with=FALSE.
 test(812.1, DT[,!"b",with=FALSE], DT[,-match("b",names(DT)),with=FALSE])
-test(812.2, DT[,"foo",with=FALSE], error="column(s) not found: foo")
+test(812.2, DT[,"foo",with=FALSE], error="column(s) not found: [foo]")
 test(812.3, DT[,!"foo",with=FALSE], DT, warning="column(s) not removed because not found: [foo]")
 test(812.4, DT[,!c("b","foo"),with=FALSE], DT[,list(a,c)], warning="column(s) not removed because not found: [foo]")
 test(812.5, DT[,!2:3,with=FALSE], DT[,-(2:3),with=FALSE])  # for consistency, but ! is really for character column names
@@ -2274,7 +2312,7 @@ test(813.4, rownames(DT[2,"a"]), "1")
 # also repeat 812.* but without with=FALSE since that will be deprecated in future, and cover - as well as !
 test(814.01, DT[,!"b"], DT[,c("a","c")])
 test(814.02, DT[,-"b"], DT[,c("a","c")])
-test(814.03, DT[,"foo"], error="column(s) not found: foo")
+test(814.03, DT[,"foo"], error="column(s) not found: [foo]")
 test(814.04, DT[,!"foo"], DT, warning="column(s) not removed because not found: [foo]")
 test(814.05, DT[,-"foo"], DT, warning="column(s) not removed because not found: [foo]")
 test(814.06, DT[,!c("b","foo")], DT[,list(a,c)], warning="column(s) not removed because not found: [foo]")
@@ -2325,8 +2363,8 @@ test(827.1, names(a[b]), c("User ID","Blah Blah","Yadda Yadda"))
 # setcolorder and merge check for dup column names, #2193(ii)
 setnames(DT2,"b","a")
 test(828, setcolorder(DT2,c("a","b")), error="x has some duplicated column name(s): a. Please remove or rename")
-test(829, merge(DT1,DT2), error="y has some duplicated column name(s): a. Please remove or rename")
-test(830, merge(DT2,DT1), error="x has some duplicated column name(s): a. Please remove or rename")
+test(829, merge(DT1,DT2), error="y has some duplicated column name(s): [a]. Please remove or rename")
+test(830, merge(DT2,DT1), error="x has some duplicated column name(s): [a]. Please remove or rename")
 
 # attribs such as "comments" should be retained, #2270
 DT1 <- data.table(id = seq.int(1, 10), A = LETTERS[1:10], key = "id")
@@ -3014,6 +3052,14 @@ test(1034, as.data.table(x<-as.character(sample(letters, 5))), data.table(V1=x))
        error="Unknown 'id.vars' type raw")
   test(1035.012, melt(DT, id.vars=1:3, measure.vars=as.raw(0)),
        error="Unknown 'measure.vars' type raw")
+  test(1035.013, melt(data.table(a=1, b=1), id.vars=c(1,1)), data.table(a=1, a.1=1, variable=factor("b"), value=1),
+                 output="Duplicate column names found")
+  test(1035.014, melt(data.table(a1=1, b1=1, b2=2), na.rm=TRUE, measure.vars=list(a="a1", b=c("b1","b2"))), data.table(variable=factor(1,c("1","2")), a=1, b=1))
+  test(1035.015, melt(data.table(a=1+2i, b=1), id.vars="a"), error="Unknown column type 'complex' for column 'a' in 'data'")
+
+  # na.rm=TRUE with list column value, PR#4737
+  test(1035.016, melt(data.table(a1=1, b1=list(1:2), b2=list(c('foo','bar'))), na.rm=TRUE, measure.vars=list(a="a1", b=c("b1","b2"))), data.table(variable=factor(1), a=1, b=list(1:2)))
+  test(1035.017, melt(data.table(a1=1, b1=1, b2=2), na.rm=TRUE, measure.vars=list(a="a1", b=c("b1","b2"))), data.table(variable=factor(1), a=1, b=1))#this worked even before the PR.
 
   ans1 = cbind(DT[, c(1,2,8), with=FALSE], variable=factor("l_1"))
   ans1[, value := DT$l_1]
@@ -3037,7 +3083,7 @@ test(1034, as.data.table(x<-as.character(sample(letters, 5))), data.table(V1=x))
   test(1035.051, ans1, melt(DT, id.vars="id", measure.vars=list(c(5, 6), c(7, 8))))
   test(1035.052, melt(DT, id.vars="id", measure.vars=list(as.raw(0))),
        error="Unknown 'measure.vars' type raw")
-  test(1035.06, ans1, melt(DT, id.vars="id", measure.vars=list(5:6, 7:8), na.rm=TRUE)) # should've no effect
+  test(1035.06, na.omit(ans1), melt(DT, id.vars="id", measure.vars=list(5:6, 7:8), na.rm=TRUE))
   test(1035.07, ans1, melt(DT, id.vars="id", measure.vars=patterns("d_", "l_")))
   # melt retains ordered factors!
   test(1035.08, melt(DT, id.vars="id", measure.vars=c("f_1", "f_2"), value.factor=TRUE)$value, factor(c(as.character(DT$f_1), as.character(DT$f_2)), ordered=TRUE))
@@ -3175,9 +3221,9 @@ Sep,33.5,19.4,15.7,11.9,0,100.8,100.8,0,12.7,12.7,0,174.1")
   x[, c("y1","z1"):=NA]
   test(1037.405, dim(melt(x, measure.vars=patterns("^y", "^z"))), INT(4,5))
   test(1037.406, dim(ans<-melt(x, measure.vars=patterns("^y", "^z"), na.rm=TRUE)), INT(2,5))
-  test(1037.407, ans$variable, factor(c("1","1")))
+  test(1037.407, ans$variable, factor(c("2","2"), c("1", "2")))
   test(1037.408, dim(ans<-melt(x, measure.vars=patterns("^y", "^z"), na.rm=TRUE, variable.factor=FALSE)), INT(2,5))
-  test(1037.409, ans$variable, c("1","1"))
+  test(1037.409, ans$variable, c("2","2"))
 
   test(1037.410, melt(data.table(NULL), verbose=TRUE), data.table(NULL),
        output="ncol(data) is 0. Nothing to melt")
@@ -3426,7 +3472,7 @@ test(1100, dt1[dt2,roll=-Inf,rollends=c(FALSE,TRUE)]$ind, INT(NA,NA,1,2,2,2,2,2,
   set.seed(3)
   DT = data.table(a=5:1, b=runif(5))
   ans = dcast(DT, a ~ b, value.var="b")[c(4,.N), c(2,6)]
-  setnames(ans, substring(names(ans),1,6))
+  setnames(ans, substr(names(ans), 1L, 6L))
   test(1102.06, ans, data.table("0.1680"=c(NA,DT[1,b]), "0.8075"=c(DT[2,b],NA)))
 
   # Fix for case 2 in bug report #71 - dcast didn't aggregate properly when formula RHS has "."
@@ -3784,7 +3830,7 @@ test(1137.03, DT[, .SD, .SDcols=-"y"], DT[, c(1,3), with=FALSE])
 test(1137.04, DT[, .SD, .SDcols=-c("y", "x")], DT[, 3, with=FALSE])
 test(1137.05, DT[, .SD, .SDcols=-which(names(DT) %in% c("x", "y", "z"))], null.data.table())
 test(1137.06, DT[, .SD, .SDcols=c(1, -2)], error=".SDcols is numeric but has both")
-test(1137.07, DT[, .SD, .SDcols=c("x", -"y")], error="invalid argument to unary")
+test(1137.07, DT[, .SD, .SDcols=c("x", -"y")], error=base_messages$invalid_arg_unary_operator)
 test(1137.08, DT[, .SD, .SDcols=c(-1, "x")], error="Some items of .SDcols are")
 
 DT <- data.table(x=1:5, y=6:10, z=11:15, zz=letters[1:5])
@@ -4527,8 +4573,7 @@ ix = with(DT, order(1-DT$x, decreasing=TRUE))
 test(1251.07, DT[order(1-DT$x, decreasing=TRUE)], DT[ix])
 test(1251.08, DT[order(x, list(-y), decreasing=TRUE)],
              error = "Column 2 is length 1 which differs from length of column 1.*10")
-test(1251.09, DT[base::order(x, list(-y), decreasing=TRUE)],
-             error = "argument lengths differ")   # data.table's error is more helpful than base's
+test(1251.09, DT[base::order(x, list(-y), decreasing=TRUE)], error=base_messages$arg_length_mismatch)   # data.table's error is more helpful than base's
 # more "edge cases" to ensure we're consistent with base
 test(1251.10, DT[order("a")], DT[1L])
 test(1251.11, DT[order("b", "a")], DT[1L])
@@ -4907,7 +4952,7 @@ test(1290.34, DT[, names(DT) == "x", with=FALSE], as.data.table(ll[c(1,3,4)]))
 dt1 = data.table(a=character(0),b=numeric(0))
 ans1 = data.table(a=character(0), b=numeric(0), c=numeric(0))
 ans2 = data.table(a=character(0), b=numeric(0), c=numeric(0), d=integer(0))
-test(1291.1, dt1[, c:=max(b), by='a'], ans1, warning="no non-missing arguments to max")
+test(1291.1, dt1[, c:=max(b), by='a'], ans1, warning=base_messages$empty_max)
 test(1291.2, dt1[, d := integer(0), by=a], ans2)
 
 # Bug #21
@@ -4947,7 +4992,7 @@ test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L),
 test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L))
 test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L),
               warning=c("Coercing 'character' RHS to 'integer'.*column 1 named 'a'",
-                        "NAs introduced by coercion"))
+                        base_messages$coerce_na))
 test(1294.05, dt[, a := list(list(1))]$a, rep(1L, 3L),
               warning="Coercing 'list' RHS to 'integer' to match.*column 1 named 'a'")
 test(1294.06, dt[, a := list(1L)]$a, rep(1L, 3L))
@@ -4957,7 +5002,7 @@ test(1294.09, dt[, b := 1L]$b, rep(1,3))
 test(1294.10, dt[, b := NA]$b, rep(NA_real_,3))
 test(1294.11, dt[, b := "bla"]$b, rep(NA_real_, 3),
               warning=c("Coercing 'character' RHS to 'double' to match.*column 2 named 'b'",
-                        "NAs introduced by coercion"))
+                        base_messages$coerce_na))
 test(1294.12, dt[, b := list(list(1))]$b, rep(1,3),
               warning="Coercing 'list' RHS to 'double' to match.*column 2 named 'b'")
 test(1294.13, dt[, b := TRUE]$b, rep(1,3))
@@ -5121,7 +5166,8 @@ test(1313.22, DT[, list(y=max(y, na.rm=TRUE)), by=x], DT[c(5,10)])
 
 # for character
 set.seed(1L)
-DT <- data.table(x=rep(1:6, each=3), y=sample(c("", letters[1:3], NA), 18, TRUE))
+DT <- data.table(x=rep(1:7, each=3), y=sample(c("", letters[1:3], NA), 21, TRUE))
+DT[x==7, y := c("","b","c")]
 test(1313.23, DT[, min(y), by=x], DT[, base::min(y), by=x])
 test(1313.24, DT[, max(y), by=x], DT[, base::max(y), by=x])
 test(1313.25, DT[, min(y, na.rm=TRUE), by=x], DT[, base::min(y, na.rm=TRUE), by=x])
@@ -5129,8 +5175,8 @@ test(1313.26, DT[, max(y, na.rm=TRUE), by=x], DT[, base::max(y, na.rm=TRUE), by=
 DT[x==6, y := NA_character_]
 test(1313.27, DT[, min(y), by=x], DT[, base::min(y), by=x])
 test(1313.28, DT[, max(y), by=x], DT[, base::max(y), by=x])
-test(1313.29, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c("a","a","c","","a",NA)), warning="No non-missing")
-test(1313.30, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c("b","a","c","a","c",NA)), warning="No non-missing")
+test(1313.29, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:7, V1=c("a","a","c","","a",NA,"")), warning="No non-missing")
+test(1313.30, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:7, V1=c("b","a","c","a","c",NA,"c")), warning="No non-missing")
 
 # bug 700 - bmerge, roll=TRUE and nomatch=0L when i's key group occurs more than once
 dt1 <- data.table(structure(list(x = c(7L, 33L), y = structure(c(15912, 15912), class = "Date"), z = c(626550.35284, 7766.385)), .Names =
@@ -5839,7 +5885,7 @@ test(1380, DT[a==TRUE], DT[3:4])
 # Fix #847, as.data.table.list and character(0) issue
 x <- data.table(a=character(0), b=character(0), c=numeric(0))
 setkey(x, a, b)
-test(1381, x[J("foo", character(0)), nomatch=0L], x, warning="Item 2 has 0 rows but longest item has 1; filled with NA")
+test(1381, x[J("foo", character(0)), nomatch=0L], x)
 
 # Fix for #813 and #758
 DT = data.table(x = 1:2)
@@ -5885,7 +5931,11 @@ test(1388, as.character(x), c("00:00:01", "-00:00:01", "-01:01:40"))
 # Fix for #880. Another eval(parse(.)) issue.
 DT <- as.data.table(iris)
 DT[, foo := "Species"]
-test(1389, copy(DT)[,bar := eval(parse(text=foo[1]), envir=.SD)], copy(DT)[, bar := Species])
+test(1389.1, copy(DT)[,bar := eval(parse(text=foo[1]), envir=.SD)], copy(DT)[, bar := Species])
+# another test from #1181 for completeness
+DT1 = data.table(a = 1, key = 'a')
+DT2 = data.table(c = 1, fn = list(quote(5*a)), key = 'c')
+test(1389.2, DT1[, n:=eval(DT2[a]$fn[[1]], .SD)], data.table(a=1, n=5, key="a"))
 
 # Fix for foverlaps() floating point interval (double) types. Should increment them by machine tolerance, not by 1L
 DT1 = data.table(start=c(0.88), end=c(0.88))
@@ -6656,6 +6706,7 @@ if (test_xts) {
   setcolorder(dt, c(2, 3, 1))
   dt[ , char_col := 'a']
   test(1465.17, as.xts(dt), xt, warning = 'columns are not numeric')
+  if (base::getRversion() < "3.6.0") rm(as.xts)
 
   # 890 -- key argument for as.data.table.xts
   x = xts(1:10, as.Date(1:10, origin = "1970-01-01"))
@@ -6666,6 +6717,10 @@ if (test_xts) {
          " 6: 1970-01-07  6", " 7: 1970-01-08  7", " 8: 1970-01-09  8",
          " 9: 1970-01-10  9", "10: 1970-01-11 10"))
   options(old)
+  
+  # as.data.table.xts(foo) had incorrect integer index with a column name called 'x', #4897
+  M = xts::as.xts(matrix(1, dimnames=list("2021-05-23", "x")))  # xts:: just to be extra robust; shouldn't be needed with rm(as.xts) above
+  test(1465.19, inherits(as.data.table(M)$index,"POSIXct"))
 
   Sys.setenv("_R_CHECK_LENGTH_1_LOGIC2_" = TRUE)
 }
@@ -6894,13 +6949,12 @@ test(1486.1, as.data.frame(ans1.1), as.data.frame(ans1.2))
 test(1486.2, as.data.frame(ans2.1), as.data.frame(ans2.1))
 
 # Fix for #832
-x <- matrix(1:9, ncol=3)
-setattr(x, "names", paste("V", seq_len(length(x)), sep = ""))
+x <- matrix(1:9, ncol=3L)
+setattr(x, "names", paste0("V", seq_along(x)))
 test(1487.1, setattr(x, "class", c("data.table", "data.frame")), error="Internal structure doesn't seem to be a list")
-x <- matrix(1:9, ncol=3)
+x <- matrix(1:9, ncol=3L)
 class(x) = c("data.table", "data.frame")
-# not sure how to test this one, so using `tryCatch`
-test(1487.2, tryCatch(print(x), error=function(k) "bla"), "bla")
+test(1487.2, print(x), error="dim.data.table expects a data.table as input")
 
 # Fix for #1043
 DT = data.table(grp=LETTERS[1:2], categ=rep(c("X","Y"), each=2L), condition=rep(c("P","Q"), each=4L), value=sample(8))
@@ -7294,18 +7348,22 @@ test(1530.4, which.last(x), tail(which(x), 1L))
 set.seed(2L)
 x = apply(matrix(sample(letters, 12), nrow=2), 1, paste, collapse="")
 y = factor(sample(c(letters[1:5], x), 20, TRUE))
-xsub = substring(x, 1L, 1L)
-test(1532.1, y %like% xsub[1L], grepl(xsub[1L], y))
-test(1532.2, y %like% xsub[2L], grepl(xsub[2L], y))
-test(1532.3, like(y, xsub[1L]), grepl(xsub[1L], y))
-test(1532.4, like(y, xsub[2L]), grepl(xsub[2L], y))
+xsub = substr(x, 1L, 1L)
+test(1532.01, y %like% xsub[1L], grepl(xsub[1L], y))
+test(1532.02, y %like% xsub[2L], grepl(xsub[2L], y))
+test(1532.03, like(y, xsub[1L]), grepl(xsub[1L], y))
+test(1532.04, like(y, xsub[2L]), grepl(xsub[2L], y))
 ## %ilike% and %flike% for #3333
 x = c('HEY', 'hey', '()')
-test(1532.5, like(x, 'hey', ignore.case = TRUE), c(TRUE, TRUE, FALSE))
-test(1532.6, like(x, '()'), c(TRUE, TRUE, TRUE))
-test(1532.7, like(x, '()', fixed = TRUE), c(FALSE, FALSE, TRUE))
-test(1532.8, x %ilike% 'hey', c(TRUE, TRUE, FALSE))
-test(1532.9, x %flike% '()', c(FALSE, FALSE, TRUE))
+test(1532.05, like(x, 'hey', ignore.case = TRUE), c(TRUE, TRUE, FALSE))
+test(1532.06, like(x, '()'), c(TRUE, TRUE, TRUE))
+test(1532.07, like(x, '()', fixed = TRUE), c(FALSE, FALSE, TRUE))
+test(1532.08, x %ilike% 'hey', c(TRUE, TRUE, FALSE))
+test(1532.09, x %flike% '()', c(FALSE, FALSE, TRUE))
+## %like% test for ordered factor with NA
+x = c("A", "B", "C", NA_character_)
+x = ordered(x, levels = rev(x)[-1L])
+test(1532.10, x %like% "A", c(TRUE, FALSE, FALSE, FALSE))
 
 # coverage for setkey() to 100%
 dt1 = data.table(x=sample(5), y=1:5, key="y")
@@ -8314,10 +8372,18 @@ DT2 = data.table(id1=c("c", "w", "b"), val=50:52)
 test(1600.2, names(DT1[DT2, .(id1=id1, val=val, bla=sum(z1, na.rm=TRUE)), on="id1"]), c("id1", "val", "bla"))
 
 # warn when merge empty data.table #597
-test(1601.1, merge(data.table(a=1),data.table(a=1), by="a"), data.table(a=1, key="a"))
-test(1601.2, tryCatch(merge(data.table(a=1),data.table(NULL), by="a"), warning = function(w) w$message), "You are trying to join data.tables where 'y' argument is 0 columns data.table.")
-test(1601.3, tryCatch(merge(data.table(NULL),data.table(a=1), by="a"), warning = function(w) w$message), "You are trying to join data.tables where 'x' argument is 0 columns data.table.")
-test(1601.4, tryCatch(merge(data.table(NULL),data.table(NULL), by="a"), warning = function(w) w$message), "You are trying to join data.tables where 'x' and 'y' arguments are 0 columns data.table.")
+DT0 = data.table(NULL)
+DT1 = data.table(a=1)
+test(1601.1, merge(DT1, DT1, by="a"), data.table(a=1, key="a"))
+test(1601.2, merge(DT1, DT0, by="a"),
+     warning="You are trying to join data.tables where 'y' has 0 columns.",
+     error="Elements listed in `by`")
+test(1601.3, merge(DT0, DT1, by="a"),
+     warning="You are trying to join data.tables where 'x' has 0 columns.",
+     error="Elements listed in `by`")
+test(1601.4, merge(DT0, DT0, by="a"),
+     warning="You are trying to join data.tables where 'x' and 'y' have 0 columns.",
+     error="Elements listed in `by`")
 
 # fix for #1549
 d1 <- data.table(v1=1:2,x=x)
@@ -8460,17 +8526,17 @@ test(1613.21, all.equal(DT2, DT1, ignore.row.order = TRUE), "Dataset 'current' h
 # test attributes: key
 DT1 <- data.table(a = 1:4, b = letters[1:4], key = "a")
 DT2 <- data.table(a = 1:4, b = letters[1:4])
-test(1613.22, all.equal(DT1, DT2), "Datasets has different keys. 'target': a. 'current' has no key.")
+test(1613.22, all.equal(DT1, DT2), "Datasets have different keys. 'target': [a]. 'current': has no key.")
 test(1613.23, all.equal(DT1, DT2, check.attributes = FALSE), TRUE)
 test(1613.24, all.equal(DT1, setkeyv(DT2, "a"), check.attributes = TRUE), TRUE)
 # test attributes: index
 DT1 <- data.table(a = 1:4, b = letters[1:4])
 DT2 <- data.table(a = 1:4, b = letters[1:4])
 setindexv(DT1, "b")
-test(1613.25, all.equal(DT1, DT2), "Datasets has different indexes. 'target': b. 'current' has no index.")
+test(1613.25, all.equal(DT1, DT2), "Datasets have different indices. 'target': [b]. 'current': has no index.")
 test(1613.26, all.equal(DT1, DT2, check.attributes = FALSE), TRUE)
-test(1613.27, all.equal(DT1, setindexv(DT2, "a")), "Datasets has different indexes. 'target': b. 'current': a.")
-test(1613.28, all.equal(DT1, setindexv(DT2, "b")), "Datasets has different indexes. 'target': b. 'current': a, b.")
+test(1613.27, all.equal(DT1, setindexv(DT2, "a")), "Datasets have different indices. 'target': [b]. 'current': [a].")
+test(1613.28, all.equal(DT1, setindexv(DT2, "b")), "Datasets have different indices. 'target': [b]. 'current': [a, b].")
 test(1613.29, all.equal(DT1, setindexv(setindexv(DT2, NULL), "b")), TRUE)
 # test custom attribute
 DT1 <- data.table(a = 1:4, b = letters[1:4])
@@ -8479,7 +8545,7 @@ setattr(DT1, "custom", 1L)
 test(1613.30, all.equal(DT1, DT2), "Datasets has different number of (non-excluded) attributes: target 3, current 2")
 test(1613.31, all.equal(DT1, DT2, check.attributes = FALSE), TRUE)
 setattr(DT2, "custom2", 2L)
-test(1613.32, all.equal(DT1, DT2), "Datasets has attributes with different names: custom, custom2")
+test(1613.32, all.equal(DT1, DT2), "Datasets has attributes with different names: [custom, custom2]")
 setattr(DT1, "custom2", 2L)
 setattr(DT2, "custom", 0L)
 test(1613.33, all.equal(DT1, DT2), paste0("Attributes: < Component ", dQuote("custom"), ": Mean relative difference: 1 >"))
@@ -9500,7 +9566,7 @@ nqjoin_test <- function(x, y, k=1L, test_no, mult="all") {
   runcmb = as.data.table(runcmb[, 1:min(100L, ncol(runcmb)), drop=FALSE]) # max 100 combinations to test
   runops = lapply(runcmb, function(cols) {
     thisops = sample(ops, k, TRUE)
-    thisops[substring(cols,1,1)=="c"] = "=="
+    thisops[startsWith(cols, "c")] = "=="
     thisops
   })
   is_only_na <- function(x) is.na(x) & !is.nan(x)
@@ -9948,7 +10014,8 @@ test(1670.2, class(as.data.table(x)), class(x)[2:3])
 
 # #1676, `:=` with by shouldn't add cols on supported types
 dt = data.table(x=1, y=2)
-test(1671, dt[, z := sd, by=x], error="invalid type/length (closure/1)")
+test(1671, dt[, z := sd, by=x],
+     error=gettextf("invalid type/length (%s/%d) in vector allocation", "closure", 1L, domain="R"))
 
 # 1683
 DT <- data.table(V1 = rep(1:2, 3), V2 = 1:6)
@@ -10286,11 +10353,11 @@ if (.Platform$OS.type=="unix") {
   cat("a,b\n4,2", file=f<-tempfile())
   cmd <- sprintf("cat %s", f)
   options(datatable.fread.input.cmd.message = TRUE)
-  test(1703.01, fread(cmd), ans<-data.table(a=4L, b=2L), message="Please use fread.cmd=.*security concern.*Please read item 5 in the NEWS file for v1.11.6")
+  test(1703.01, fread(cmd), ans<-data.table(a=4L, b=2L), message="security concern.*Please read item 5 in the NEWS file for v1.11.6")
   options(datatable.fread.input.cmd.message = NULL)  # when option is missing as it is by default, then TRUE
   test(1703.02, fread(cmd), ans, message="security concern")
   options(datatable.fread.input.cmd.message = FALSE)
-  test(1703.03, tryCatch(fread(cmd), message=stop), ans)
+  test(1703.03, fread(cmd), ans)
   options(datatable.fread.input.cmd.message = NULL)
   test(1703.04, fread(cmd=cmd), ans)
   test(1703.05, fread(file=cmd), error=sprintf("File '%s' does not exist", cmd))
@@ -10315,7 +10382,8 @@ if (.Platform$OS.type=="unix") {
 test(1703.15, fread("."), error="File '.' is a directory. Not yet implemented.")
 # tmpdir argument
 d = tempfile("dir")
-test(1703.16, fread(text=c('a,b','1,2'), tmpdir=d), error="cannot open the connection", warning="No such file or directory")
+test(1703.16, fread(text=c('a,b','1,2'), tmpdir=d),
+     error=base_messages$cant_open_file, warning=base_messages$missing_file)
 dir.create(d)
 test(1703.17, fread(text=c('a,b','1,2'), tmpdir=d), data.table(a=1L,b=2L))
 test(1703.18, fread(text=c('a,b','1,2')), data.table(a=1L, b=2L))
@@ -10382,8 +10450,8 @@ test(1722.2, DT[,(!is.na(as.numeric(FieldName)))], c(TRUE,TRUE,FALSE,TRUE,FALSE,
 test(1723.1, DT[removalIndex>0,rowId-(2*removalIndex-1)], c(-2,-11,-5,-14))
 test(1723.2, DT[removalIndex>0,(rowId-(2*removalIndex-1))], c(-2,-11,-5,-14))
 DT = data.table(FieldName = c("1", "2", "3", "four", "five", "6"))
-test(1724.1, DT[, is.na(as.numeric(FieldName))], c(FALSE,FALSE,FALSE,TRUE,TRUE,FALSE), warning="NAs introduced by coercion")
-test(1724.2, DT[, !is.na(as.numeric(FieldName))], c(TRUE,TRUE,TRUE,FALSE,FALSE,TRUE), warning="NAs introduced by coercion")
+test(1724.1, DT[, is.na(as.numeric(FieldName))], c(FALSE,FALSE,FALSE,TRUE,TRUE,FALSE), warning=base_messages$coerce_na)
+test(1724.2, DT[, !is.na(as.numeric(FieldName))], c(TRUE,TRUE,TRUE,FALSE,FALSE,TRUE), warning=base_messages$coerce_na)
 
 # Ensure NA's are added properly when a new column is added, not all the target rows are joined to, and the number of i
 # rows is equal or greater than the number of rows in the target table.
@@ -10834,7 +10902,8 @@ test(1743.217, sapply(fread("a,b,c,d,e,f\na,b,c,d,e,f", colClasses = list(factor
 test(1743.218, sapply(fread("a,b,c,d,e,f\na,b,c,d,e,f", colClasses = list(factor = c(1, 2, 4), factor = 3), select = c(5, 4, 2, 3)), class), y = c(e = "character", d = "factor", b = "factor", c = "factor"))
 
 test(1743.22, fread("a,b,c\n1999/01/01,2,f", colClasses=list(Date=1L), drop="a"), data.table(b=2L, c="f"))
-test(1743.231, fread("a,b,c\n2,1,4i", colClasses=list(complex="c", integer=2L), drop="a"), data.table(b=1L, c="4i"), warning="NAs introduced by coercion.*left as type 'character'")
+test(1743.231, fread("a,b,c\n2,1,4i", colClasses=list(complex="c", integer=2L), drop="a"), data.table(b=1L, c="4i"),
+     warning=paste0(base_messages$coerce_na, ".*left as type 'character'"))
 test(1743.232, fread("a,b,c\n2,1,3+4i", colClasses=list(complex="c", integer=2L), drop="a"), data.table(b=1L, c=3+4i))
 test(1743.241, fread("a,b,c\n2,2,f", colClasses = list(character="c", integer="b"), drop="a"), data.table(b=2L, c="f"))
 test(1743.242, fread("a,b,c\n2,2,f", colClasses = c("integer", "integer", "factor"), drop="a"), data.table(b=2L, c=factor("f")))
@@ -10874,7 +10943,9 @@ test(1743.308, fread(data1743, colClasses=list(NULL=c("C","D")), drop=1:2), data
 test(1743.311, fread(data1743, colClasses="NULL"), ans<-data.table(A=1:2, B=3:4, C=5:6, D=7:8), warning="colClasses.*quoted.*interpreted as colClasses.*NULL")
 test(1743.312, fread(data1743, colClasses=character()), ans)
 test(1743.32, fread("A,B\na,0+1i", colClasses="complex"), data.table(A="a", B=1i),
-              warning="Column 'A' was requested to be 'complex'.*NAs introduced by coercion.*column has been left as.*character")
+              warning=paste0("Column 'A' was requested to be 'complex'.*",
+                             base_messages$coerce_na,
+                             ".*column has been left as.*character"))
 test(1743.33, fread(data1743, colClasses=list("character"=4, "numeric"=c(2,NA,1))), data.table(A=c(1,2), B=c(3,4), C=5:6, D=c("7","8")), warning="colClasses[[2]][2] is NA")
 test(1743.34, fread(data1743, select=list("character"=4, "numeric"=c(2,NA,1))), data.table(D=c("7","8"), B=c(3,4), A=c(1,2)), warning="colClasses[[2]][2] is NA")
 old = options(warn=2)
@@ -11009,7 +11080,7 @@ test(1750.10,
 # groupingsets on aggregate using grouping col char type and sum - error
 test(1750.11,
   groupingsets(dt, j = lapply(.SD, sum), by = c("status","year"), sets=list(character()), .SDcols="color"),
-  error = "invalid 'type' (character) of argument"
+  error=base_messages$invalid_arg_sum("character")
 )
 # groupingsets on aggregate using grouping col factor type and sum - error
 test(1750.12,
@@ -11059,9 +11130,9 @@ test(1750.19, uniqueN({
 ), 1L, warning = "'sets' contains a duplicate")
 # entries in `by` / `sets` not exists in data.table
 test(1750.20, exists("notexist"), FALSE)  # https://github.com/Rdatatable/data.table/issues/3055#issuecomment-423364960
-test(1750.21, groupingsets(dt, j = c(list(cnt=.N), lapply(.SD, sum)), by = c("color","year","notexist"), sets=list(c("color"), character()), id=TRUE), error = "object 'notexist' not found")
+test(1750.21, groupingsets(dt, j = c(list(cnt=.N), lapply(.SD, sum)), by = c("color","year","notexist"), sets=list(c("color"), character()), id=TRUE), error=base_messages$missing_object("notexist"))
 test(1750.22, groupingsets(dt, j = c(list(cnt=.N), lapply(.SD, sum)), by = c("color","year","status"), sets=list(c("color"), "stat"), id=TRUE), error = "Columns used in 'sets' but not present in 'by': [stat]")
-test(1750.23, groupingsets(dt, j = .(a=sum(notexist)), by = c("color","year","status"), sets=list(c("color"), character()), id=TRUE), error = "object 'notexist' not found")
+test(1750.23, groupingsets(dt, j = .(a=sum(notexist)), by = c("color","year","status"), sets=list(c("color"), character()), id=TRUE), error=base_messages$missing_object("notexist"))
 # update by ref `:=` forbidden
 test(1750.24,
   groupingsets(dt, j = sum_value := sum(value), by = c("color","year","status"), sets=list(c("color"), character())),
@@ -11388,16 +11459,18 @@ if (exists("B")) rm(B)
 if (exists("NOTEXIST")) rm(NOTEXIST)
 if (exists("MyCol")) rm(MyCol)
 DT <- data.table(A = c(FALSE, TRUE), B = 2:1, C=c(2,3), MyCol=c(2,2))
-test(1773.01, DT[A], error = "A is not found in calling scope but it is a column of type logical.*==TRUE.*When the first argument")
-test(1773.02, DT[B], error = "B is not found in calling scope but it is a column of type integer.*DT\\[\\(col\\)\\].*When the first argument")   # 697
-test(1773.03, DT[C], error = "i has evaluated to type closure. Expecting logical, integer or double")  # C picks up stats::C in calling scope
-test(1773.04, DT[MyCol], error="MyCol is not found in calling scope but it is a column of type double.*DT\\[\\(col\\)\\].*When the first argument")
-test(1773.05, DT[NOTEXIST], error = "NOTEXIST is not found in calling scope and it is not a column name either. When the first argument")
+test(1773.01, DT[A], error = "'A' is not found in calling scope, but it is a column of type logical.*==TRUE.*When the first argument")
+test(1773.02, DT[B], error = "'B' is not found in calling scope, but it is a column of type integer.*DT\\[\\(col\\)\\].*When the first argument")   # 697
+test(1773.03, DT[C], error = "'C' is not found in calling scope, but it is a column of type double")  # C picks up stats::C in calling scope
+test(1773.04, DT[MyCol], error="'MyCol' is not found in calling scope, but it is a column of type double.*DT\\[\\(col\\)\\].*When the first argument")
+test(1773.05, DT[NOTEXIST], error = "'NOTEXIST' is not found in calling scope and it is not a column name either. When the first argument")
 test(1773.06, DT[(A)], DT[2])
 test(1773.07, DT[A==TRUE], DT[2])
 test(1773.08, DT[(B)], data.table(A=c(TRUE,FALSE), B=1:2, C=c(3,2), MyCol=2))
 test(1773.09, DT[(MyCol)], data.table(A=c(TRUE,TRUE), B=INT(1,1), C=c(3,3), MyCol=2))
 test(1773.10, DT[(C)], data.table(A=c(TRUE,NA), B=c(1L,NA), C=c(3,NA), MyCol=c(2,NA)))
+test(1773.11, data.table(subset=c(TRUE,FALSE))[subset], # i being a function name that's also a column name, #5014
+              error="'subset' is not found in calling scope, but")
 
 # New as.data.table.array method in v1.10.5
 set.seed(1L)
@@ -13026,11 +13099,11 @@ test(1923.2, indices(DT, vectors=TRUE), list(c("V1")))
 DT = data.table(varname = 1)
 test(1924.1, DT[var_name==1], error='not found\\. Perhaps you intended.*varname')
 test(1924.2, DT[variable==1], error='Object.*not found among')
-test(1924.3, DT[varname+'a'], error='non-numeric argument')
+test(1924.3, DT[varname+'a'], error=base_messages$invalid_arg_binary_operator)
 DT[, VAR_NAME:=2]
-test(1924.4, DT[var_name==1], error="Object 'var_name' not found. Perhaps you intended varname, VAR_NAME")
+test(1924.4, DT[var_name==1], error="Object 'var_name' not found. Perhaps you intended [varname, VAR_NAME]")
 DT = setDT(lapply(integer(50), function(...) numeric(1L)))
-test(1924.5, DT[V==0], error='Perhaps you intended.*V1.*V5 or 45 more')
+test(1924.5, DT[V==0], error='Perhaps you intended.*V1.*V10, [.]{3}')
 
 # test suite of as.ITime methods (subsumes #2870)
 s = c('1970-01-01 00:00:00.1234', '2005-10-12 09:45:32.84')
@@ -13184,15 +13257,15 @@ test(1948.09, DT[i, on = eval(eval("id<=idi"))], DT[i, on = "id<=idi"])
 test(1948.10, DT[i, on = ""], error = "'on' contains no column name: . Each 'on' clause must contain one or two column names.")
 test(1948.11, DT[i, on = "id>=idi>=1"], error = "Found more than one operator in one 'on' statement: id>=idi>=1. Please specify a single operator.")
 test(1948.12, DT[i, on = "`id``idi`<=id"], error = "'on' contains more than 2 column names: `id``idi`<=id. Each 'on' clause must contain one or two column names.")
-test(1948.13, DT[i, on = "id != idi"], error = "Invalid operators !=. Only allowed operators are ==<=<>=>.")
+test(1948.13, DT[i, on = "id != idi"], error = "Invalid join operators [!=]. Only allowed operators are [==, <=, <, >=, >].")
 test(1948.14, DT[i, on = 1L], error = "'on' argument should be a named atomic vector of column names indicating which columns in 'i' should be joined with which columns in 'x'.")
 
 # helpful error when on= is provided but not i, rather than silently ignoring on=
 DT = data.table(A=1:3)
-test(1949.1, DT[,,on=A], error="object 'A' not found") # tests .1 to .4 amended after #3621
-test(1949.2, DT[,1,on=A], error="object 'A' not found")
-test(1949.3, DT[on=A], error="object 'A' not found")
-test(1949.4, DT[,on=A], error="object 'A' not found")
+test(1949.1, DT[,,on=A], error=base_messages$missing_object("A")) # tests .1 to .4 amended after #3621
+test(1949.2, DT[,1,on=A], error=base_messages$missing_object("A"))
+test(1949.3, DT[on=A], error=base_messages$missing_object("A"))
+test(1949.4, DT[,on=A], error=base_messages$missing_object("A"))
 test(1949.5, DT[1,,with=FALSE], error="j must be provided when with=FALSE")
 test(1949.6, DT[], output="A.*1.*2.*3")   # no error
 test(1949.7, DT[,], output="A.*1.*2.*3")  # no error, #3163
@@ -13259,14 +13332,17 @@ test(1957.3, fread("A,B\na,b\nc,d\n", stringsAsFactors=TRUE, verbose=TRUE), data
              output="stringsAsFactors=TRUE converted 2 column(s): [A, B]")
 
 # misc. coverage tests in fread
-test(1958.1, fread('\U0001f64d', encoding = 'UTF-16'), error = "Argument 'encoding' must be")
-test(1958.2, fread('a,b\n1,2', nrows = NA_real_),      data.table(a = 1L, b = 2L))
-test(1958.3, fread('a,b\n1,2', nrows = -1),            data.table(a = 1L, b = 2L))
-test(1958.4, fread('a,b\n1,2', key = 1),               error = 'must be a character vector naming columns')
-test(1958.5, fread("A,B,C\n1,2,3\n3,4,5\n0,0,0\n",   nrows=0),          data.table(A=logical(), B=logical(), C=logical()))  #2747
-test(1958.6, fread("A,B,C\n1,2,3\n3,4,5\n0,0,100\n", nrows=0, sep=','), data.table(A=logical(), B=logical(), C=logical()))
-test(1958.7, fread('A,B,C,D\n"a,b",4,5,6\n"c,d",6,7\n', fill=TRUE), data.table(A=c("a,b","c,d"), B=INT(4,6), C=INT(5,7), D=INT(6,NA)))  # 2547
-test(1958.8, fread('A,B,C,D\n"a,b",4,5\n"c,d",6,7,8\n', fill=TRUE), data.table(A=c("a,b","c,d"), B=INT(4,6), C=INT(5,7), D=INT(NA,8)))
+test(1958.01, fread('\U0001f64d', encoding = 'UTF-16'), error = "Argument 'encoding' must be")
+test(1958.02, fread('a,b\n1,2', nrows = NA_real_),      data.table(a = 1L, b = 2L))
+test(1958.03, fread('a,b\n1,2', nrows = -1),            data.table(a = 1L, b = 2L))
+test(1958.04, fread('a,b\n1,2', key = 1),               error = 'must be a character vector naming columns')
+test(1958.05, fread("A,B,C\n1,2,3\n3,4,5\n0,0,0\n",   nrows=0),          data.table(A=logical(), B=logical(), C=logical()))  #2747
+test(1958.06, fread("A,B,C\n1,2,3\n3,4,5\n0,0,100\n", nrows=0, sep=','), data.table(A=logical(), B=logical(), C=logical()))
+test(1958.07, fread('A,B,C,D\n"a,b",4,5,6\n"c,d",6,7\n', fill=TRUE), data.table(A=c("a,b","c,d"), B=INT(4,6), C=INT(5,7), D=INT(6,NA)))  # 2547
+test(1958.08, fread('A,B,C,D\n"a,b",4,5\n"c,d",6,7,8\n', fill=TRUE), data.table(A=c("a,b","c,d"), B=INT(4,6), C=INT(5,7), D=INT(NA,8)))
+# 4686
+test(1958.09, fread("A,B,C\n1,2,3\n3,4,5\n0,0,0\n",   nrows=0L),          data.table(A=logical(), B=logical(), C=logical()))
+test(1958.10, fread("A,B,C\n1,2,3\n3,4,5\n0,0,100\n", nrows=0L, sep=','), data.table(A=logical(), B=logical(), C=logical()))
 
 # Skip should work with all types of newlines #3006
 eols = c("\n", "\r\n", "\r", "\n\r")
@@ -13332,8 +13408,7 @@ test(1962.004, duplicated(DT, by = -1L),
      error = 'specify non existing column*.*-1')
 test(1962.005, duplicated(DT, by = 'y'),
      error = 'specify non existing column*.*y')
-test(1962.0061, duplicated(data.table(NULL)), logical(0L))
-test(1962.0062, duplicated(data.table(a = 1L), by = character()), logical())
+test(1962.006, duplicated(data.table(NULL)), logical(0L))
 
 test(1962.007, unique(DT, incomparables = TRUE),
      error = 'not used (yet)')
@@ -13751,7 +13826,7 @@ test(1967.34, data.table(1:5, NULL), data.table(V1=1:5))
 ###   if (novname[i]) vnames[[i]] = namesi
 ### but, on pause for now pending #3193
 ### test(1967.35, data.table(1:5, matrix(6:15, nrow = 5L))
-test(1967.35, data.table(1:5, integer(0L)), data.table(1:5, NA_integer_), warning="Item 2 has 0 rows but longest item has 5; filled with NA")
+test(1967.35, data.table(1:5, integer(0L)), data.table(integer(0L), integer(0L)))  # no longer NA-fill zero-length, PR#4262
 test(1967.36, data.table(1:5, key = 5L), error = 'must be character')
 
 x = data.table(a = 1:5)
@@ -13773,12 +13848,12 @@ test(1967.49, x[ , list(5) := 6], error = 'LHS of := must be a symbol')
 test(1967.50, x[ , 1 + 3i := 6], error = "LHS of := isn't column names")
 test(1967.511, x[ , .(5L), by = .EACHI, mult = 'all'], error='logical error. i is not data.table')
 test(1967.512, x[1+3i], error='i has evaluated to type complex. Expecting logical, integer or double')
-test(1967.521, x[1:2, by=a], x[1:2,], warning="Ignoring by= because j= is not supplied")
-test(1967.522, x[, by=a],    x,       warning=c("Ignoring by= because j= is not supplied","i and j are both missing.*upgraded to error in future"))
-test(1967.523, x[by=a],      x,       warning=c("Ignoring by= because j= is not supplied","i and j are both missing.*upgraded to error in future"))
-test(1967.524, x[1:2, keyby=a], x[1:2,], warning="Ignoring keyby= because j= is not supplied")
-test(1967.525, x[, keyby=a],    x,       warning=c("Ignoring keyby= because j= is not supplied","i and j are both missing.*upgraded to error in future"))
-test(1967.526, x[keyby=a],      x,       warning=c("Ignoring keyby= because j= is not supplied","i and j are both missing.*upgraded to error in future"))
+test(1967.521, x[1:2, by=a], x[1:2,], warning="Ignoring by/keyby because 'j' is not supplied")
+test(1967.522, x[, by=a],    x,       warning=c("Ignoring by/keyby because 'j' is not supplied","i and j are both missing.*upgraded to error in future"))
+test(1967.523, x[by=a],      x,       warning=c("Ignoring by/keyby because 'j' is not supplied","i and j are both missing.*upgraded to error in future"))
+test(1967.524, x[1:2, keyby=a], x[1:2,], warning="Ignoring by/keyby because 'j' is not supplied")
+test(1967.525, x[, keyby=a],    x,       warning=c("Ignoring by/keyby because 'j' is not supplied","i and j are both missing.*upgraded to error in future"))
+test(1967.526, x[keyby=a],      x,       warning=c("Ignoring by/keyby because 'j' is not supplied","i and j are both missing.*upgraded to error in future"))
 
 test(1967.53, as.matrix(x, rownames = 2:3), error='length(rownames)==2 but')
 test(1967.54, as.matrix(x[0L]), structure(integer(0), .Dim = c(0L, 2L), .Dimnames = list(NULL, c("a", "b"))))
@@ -13793,7 +13868,7 @@ test(1967.57, setnames(x), error = 'x has 2 columns but its names are length 0')
 names(x) = c('a', 'b')
 test(1967.58, names(setnames(x, new = c('b', 'c'))), c('b', 'c'))
 test(1967.59, setnames(x, 1:2, c(8L, 9L)), error = "'new' is not a character")
-test(1967.60, setnames(x, -1:1, c('hey', 'you')), error = "mixed.*negative")
+test(1967.60, setnames(x, -1:1, c('hey', 'you')), error = base_messages$mixed_subscripts)
 test(1967.61, setnames(x, 1+3i, 'cplx'), error = "'old' is type complex")
 test(1967.62, setnames(x, 1, c('d', 'e')), error = "'old' is length 1 but 'new'")
 test(1967.621, setnames(x, 1:2, c("a","a")), data.table(a=1:5, a=6:10))
@@ -13839,7 +13914,7 @@ test(1967.75, x[!y, sum(i4), on = 'i1', by = .EACHI, verbose = TRUE],
      data.table(i1 = c(169L, 369L), V1 = c(270L, 179L)),
      output = "not-join called with 'by=.EACHI'.*done")
 test(1967.76, x[!y, sum(i4), on = 'i1', verbose = TRUE], 510L,
-     output = 'Inverting irows for notjoin.*sec')
+     output = 'Inverting irows for notjoin.*[0-9]s')
 x[ , v := 0]
 ### hitting by = A:B branch
 test(1967.77, x[ , .(v = sum(v)), by = i1:i4], x[-10L])
@@ -14012,7 +14087,9 @@ test(1984.05, DT[ , sum(b), keyby = c, verbose = TRUE],
 ### hitting byval = eval(bysub, setattr(as.list(seq_along(xss)), ...)
 test(1984.06, DT[1:3, sum(a), by=b:c], data.table(b=10:8, c=1:3, V1=1:3))
 test(1984.07, DT[, sum(a), by=call('sin',pi)], error='must evaluate to a vector or a list of vectors')
-test(1984.08, DT[, sum(a), by=as.raw(0)],           error='column or expression.*type raw')
+test(1984.081, DT[, sum(a), by=as.raw(0)],     error="Column or expression.*1.*type 'raw'.*not.*supported")
+test(1984.082, data.table(A=1:4, L=list(1, 1:2, 1, 1:3), V=1:4)[, sum(V), by=.(A,L)],  # better error message, 4308
+               error="Column or expression.*2.*type 'list'.*not.*supported")
 test(1984.09, DT[, sum(a), by=.(1,1:2)],       error='The items.*list are length[(]s[)] [(]1,2[)].*Each must be length 10; .*rows in x.*after subsetting')
 options('datatable.optimize' = Inf)
 test(1984.10, DT[ , 1, by = .(a %% 2), verbose = TRUE],
@@ -15067,6 +15144,8 @@ test(2041.1, DT[, median(date), by=g], data.table(g=c("a","b"), V1=as.Date(c("20
 test(2041.2, DT[, median(time), by=g], DT[c(2,5),.(g=g, V1=time)])
 
 # 'invalid trim argument' with optimization level 1; #1876
+# these tests check via output= that level 1 is on, and also that level 2 is on (which includes level 1).
+# They could run in level 1 with level 2 off, but output= would need to be changed and there's no need.
 test(2042.1, DT[ , as.character(mean(date)), by=g, verbose=TRUE ],
              data.table(g=c("a","b"), V1=c("2018-01-04","2018-01-21")),
      output=msg<-"GForce is on, left j unchanged.*Old mean optimization is on, left j unchanged")
@@ -15075,7 +15154,19 @@ test(2042.1, DT[ , as.character(mean(date)), by=g, verbose=TRUE ],
 Jan.2018 = format(strptime("2018-01-01", "%Y-%m-%d"), "%b-%Y")
 test(2042.2, DT[ , format(mean(date),"%b-%Y")], Jan.2018)
 test(2042.3, DT[ , format(mean(date),"%b-%Y"), by=g, verbose=TRUE ],  # just this case generated the error
-     data.table(g=c("a","b"), V1=c(Jan.2018, Jan.2018)), output=msg)
+             data.table(g=c("a","b"), V1=c(Jan.2018, Jan.2018)), output=msg)
+# also incidentally fixed #2491
+DT = data.table(
+  Group = c("A", "A", "B", "B", "C", "C"),
+  Date1 = `class<-`(c(17446.0291040738, 17470.0221205444, 17445.0765226481,  # `class<-`() == .Date() to pass on R 3.1.0
+                  17456.0360002079, 17440.0230725919, 17451.0572453837), "Date"),
+  Date2 = `class<-`(c(17459.1561177987, 17451.1086757995, 17449.0820898537,
+                  17443.1175238448, 17461.0463715783, 17448.1033968224), "Date")
+)
+DT[ , DiffTime := abs(difftime(Date1, Date2, units = 'days'))]
+test(2042.4, DT[ , round(mean(DiffTime)), by=Group, verbose=TRUE],
+     data.table(Group=c("A", "B", "C"), V1=structure(c(16, 8, 12), class="difftime", units="days")),
+     output="Old mean optimization is on, left j unchanged.*GForce.*FALSE")
 
 # gforce wrongly applied to external variable; #875
 DT = data.table(x=INT(1,1,1,2,2), y=1:5)
@@ -15310,6 +15401,19 @@ options(old)
 test(2049.2, outer$ab, list(data.table(a=1:3, b=4L)))
 test(2049.3, outer$ab[[1]][, b := 5L], data.table(a=1:3, b=5L))
 test(2049.4, outer$ab, list(data.table(a=1:3, b=5L)))
+test(2049.5, {DT=data.table(d=list(data.table(a=1))); DT$d[[1]][, new_col:=NA]; DT},  # verbatim from #1629
+             data.table(d = list(data.table(a=1, new_col=NA))))
+# extra tests on similar theme to #1629 added in PR#4366 ...
+add_col1 = function(dt) {
+  if (is.data.table(dt)) dt[, new_col:=NA]
+  if (is.list(dt)) lapply(dt, add_col1)
+  invisible()
+}
+DT = data.table(a=c(1,2), b=list(data.table(d=c("a", "b"), e=c(100, 200))))
+test(2049.6, add_col1(DT), NULL)
+test(2049.7, names(DT), c("a","b","new_col"))
+test(2049.8, names(DT$b[[1L]]), c("d","e","new_col"))
+test(2049.9, names(DT$b[[2L]]), c("d","e","new_col"))
 
 # rbindlist zero row DT should retain its (unused) levels, #3508
 DT = data.table(f = factor(c("a", "b", "c")))
@@ -15613,7 +15717,7 @@ DT <- data.table(
   f_1 = factor(c('a', 'c', 'b', NA, 'c', 'b', 'c', 'c', NA, 'c', NA, 'c', 'a', 'b', NA, NA, NA, 'a')),
   c_1 = c("a", "c", NA, NA, NA, "c", "b", NA, "a", "b", NA, "a", "c", "b", "c", "b", "a", "b")
 )
-test(2063.1, melt(DT, id=1:2, measure=3:4), melt(DT, id=c("i_1", "i_2"), measure=c("f_1", "c_1")))
+test(2063.1, melt(DT, id=1:2, measure.vars=3:4), melt(DT, id=c("i_1", "i_2"), measure.vars=c("f_1", "c_1")))
 ## fun --> fun.aggregate
 DT = melt(as.data.table(ChickWeight), id.vars=2:4)
 setnames(DT, tolower(names(DT)))
@@ -15797,7 +15901,7 @@ test(2072.009, fifelse(test_vec, rep(1L,11L), rep(0L,10L)),      error="Length o
 test(2072.010, fifelse(test_vec, rep(1,10L), rep(0,11L)),        error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).")
 test(2072.011, fifelse(test_vec, rep(TRUE,10L), rep(FALSE,10L)), error="Length of 'yes' is 10 but must be 1 or length of 'test' (11).")
 test(2072.012, fifelse(0:1, rep(TRUE,2L), rep(FALSE,2L)),        error="Argument 'test' must be logical.")
-test(2072.013, fifelse(test_vec, TRUE, "FALSE"),                 error="'yes' is of type logical but 'no' is of type character. Please")
+test(2072.013, fifelse(test_vec, TRUE, "FALSE"),                 error="'no' is of type character but 'yes' is logical. Please")
 test(2072.014, fifelse(test_vec, list(1),list(2,4)),             error="Length of 'no' is 2 but must be 1 or length of 'test' (11).")
 test(2072.015, fifelse(test_vec, list(1,3),list(2,4)),           error="Length of 'yes' is 2 but must be 1 or length of 'test' (11).")
 test(2072.016, fifelse(test_vec, list(1), list(0)), as.list(as.numeric(out_vec)))
@@ -15823,7 +15927,7 @@ test(2072.031, fifelse(test_vec_na, "1", rep("0",12L)), as.character(out_vec_na)
 test(2072.032, fifelse(test_vec_na, rep("1",12L), "0"), as.character(out_vec_na))
 test(2072.033, fifelse(test_vec_na, rep("1",12L), rep("0",12L)), as.character(out_vec_na))
 test(2072.034, fifelse(test_vec_na, "1", "0"), as.character(out_vec_na))
-test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'yes' is of type double but 'no' is of type logical. Please")
+test(2072.035, fifelse(test_vec, as.Date("2011-01-01"), FALSE), error="'no' is of type logical but 'yes' is double. Please")
 test(2072.036, fifelse(test_vec_na, 1+0i, 0+0i), as.complex(out_vec_na))
 test(2072.037, fifelse(test_vec_na, rep(1+0i,12L), 0+0i), as.complex(out_vec_na))
 test(2072.038, fifelse(test_vec_na, rep(1+0i,12L), rep(0+0i,12L)), as.complex(out_vec_na))
@@ -16260,7 +16364,7 @@ test(2100.03, fifelse(test_vec_na, TRUE, FALSE, TRUE), as.logical(out_vec_na))
 test(2100.04, fifelse(test_vec_na, "1", "0","2"), as.character(out_vec_na))
 test(2100.05, fifelse(test_vec_na, 1+0i, 0+0i, 2+0i), as.complex(out_vec_na))
 test(2100.06, fifelse(c(TRUE,FALSE,NA), list(1:5), list(5:1), list(15:11)), list(1:5,5:1,15:11))
-test(2100.07, fifelse(test_vec_na, 1, 0, 2L), error = "'yes' is of type double but 'na' is of type integer. Please make sure that both arguments have the same type.")
+test(2100.07, fifelse(test_vec_na, 1, 0, 2L), c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2)) # corece na
 test(2100.08, fifelse(test_vec_na, 1, 0, c(2,3)), error = "Length of 'na' is 2 but must be 1")
 test(2100.09, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), as.Date("2019-08-29")), as.Date(c(18139, 18138, 18138, 18138, 18138, 18137), origin = '1970-01-01'))
 test(2100.10, fifelse(date_vec_na, as.Date("2019-08-31"), as.Date("2019-08-30"), 18137), error = "'yes' has different class than 'na'. Please make sure that both arguments have the same class.")
@@ -16609,7 +16713,7 @@ set.seed(1)
 vDT = data.table(i_id = unique(iDT$i_id))[, .(v = runif(5,0,10), p = sample(c(5,5,10,10,10))), by=i_id]
 test(2120.01, !exists("i_id"))                             # quick verify in case there's an i_id in .GlobalEnv when testing in dev
 test(2120.02, iDT[i_id, order(e_date, e_time)],            # first of all, the correct error
-              error="i_id is not found in calling scope but it is a column of type character")
+              error="'i_id' is not found in calling scope, but it is a column of type character")
 tmp = vDT[c("B","C","A"), on=.(i_id), .N, by=.EACHI]       # split long statement in 2120.05 up as per demo in #3669
 test(2120.03, tmp, data.table(i_id=c("B","C","A"), N=5L))  # just make sure the helper tmp is correct
 test(2120.04, tmp[iDT[i_id, order(e_date, e_time)]],       # i_id obtained from tmp; this is what broke in dev 1.12.3
@@ -16651,20 +16755,20 @@ t0 = as.POSIXct('2019-10-01')
 test(2124.1, format(as.ITime(t0)), '00:00:00')
 test(2124.2, format(as.IDate(t0)), '2019-10-01')
 if (is.na(oldtz)) Sys.unsetenv("TZ") else Sys.setenv(TZ=oldtz)
-# careful to unset because TZ="" means UTC whereas unset TZ means local
+# careful to unset because TZ="" means UTC whereas unset TZ means local, #4261 and #4464
 
 # trunc.cols in print.data.table, #4074
-old_width = options("width" = 40)
+old_width = options("width" = 40L)
 # Single row printing (to check issue with losing attributes)
 DT = data.table(a = "aaaaaaaaaaaaa",
                 b = "bbbbbbbbbbbbb",
                 c = "ccccccccccccc",
                 d = "ddddddddddddd")
 test(2125.01,
-     capture.output(print(DT, trunc.cols=TRUE))[3],
+     capture.output(print(DT, trunc.cols=TRUE))[3L],
      "2 variables not shown: [c, d]")
 # Printing with dots
-DT = data.table(a = vector("integer", 102),
+DT = data.table(a = vector("integer", 102L),
                 b = "bbbbbbbbbbbbb",
                 c = "ccccccccccccc",
                 d = c("ddddddddddddd", "d"))
@@ -16696,8 +16800,12 @@ test(2125.03, capture.output(print(DT, trunc.cols=TRUE, row.names=FALSE)),
        "    0 bbbbbbbbbbbbb ccccccccccccc",
        "    0 bbbbbbbbbbbbb ccccccccccccc",
        "1 variable not shown: [d]" ))
-test(2125.04, capture.output(print(DT, trunc.cols=TRUE, class=TRUE))[14],
-     "1 variable not shown: [d <char>]")
+# also testing #4266 -- getting width of row #s register right
+#   TODO: understand why 2 variables truncated here. a,b,c combined have width
+#     _exactly_ 40, but still wraps. If we set options(width=41) it won't truncate.
+#     seems to be an issue with print.default.
+test(2125.04, capture.output(print(DT, trunc.cols=TRUE, class=TRUE))[14L],
+     "2 variables not shown: [c <char>, d <char>]")
 test(2125.05, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, row.names=FALSE))[c(1,14)],
      c("        a             b             c",
        "1 variable not shown: [d <char>]" ))
@@ -16705,8 +16813,8 @@ test(2125.06, capture.output(print(DT, trunc.cols=TRUE, col.names="none"))[c(1,1
      c("  1: 0 bbbbbbbbbbbbb ccccccccccccc",
        "1 variable not shown: [d]" ))
 test(2125.07, capture.output(print(DT, trunc.cols=TRUE, class=TRUE, col.names="none"))[c(1,13)],
-     c("  1: 0 bbbbbbbbbbbbb ccccccccccccc",
-       "1 variable not shown: [d]" ),
+     c("  1: 0 bbbbbbbbbbbbb",
+       "2 variables not shown: [c, d]" ),
      warning = "Column classes will be suppressed when col.names is 'none'")
 options("width" = 20)
 DT = data.table(a = vector("integer", 2),
@@ -16896,7 +17004,6 @@ test(2132.2, fifelse(TRUE, 1, s2),       error = "S4 class objects (except nanot
 test(2132.3, fcase(TRUE, s1, FALSE, s2), error = "S4 class objects (except nanotime) are not supported. Please see")
 test(2132.4, fcase(FALSE, 1, TRUE, s1),  error = "S4 class objects (except nanotime) are not supported. Please see")
 rm(s1, s2, class2132)
-
 if (test_xts) {
   # keep.rownames in as.data.table.xts() supports a string, #4232
   xts = xts::xts(1:10, structure(1:10, class = "Date"))
@@ -17124,6 +17231,10 @@ if (TZnotUTC) {
   test(2150.20, fread("a,b,c\n2015-01-01,2015-01-02,2015-01-03 01:02:03", colClasses=c("Date",NA,NA)),
               ans, output=ans_print)
 }
+# fread single row single column datetime field, #2609
+test(2150.21, fread("c1\n2018-01-31 03:16:57"), data.table(V1=as.IDate("2018-01-31"), c1="03:16:57"),
+              warning="Detected 1 column names but the data has 2 columns")
+test(2150.22, fread("c1\n2018-01-31 03:16:57", sep=""), data.table(c1=as.POSIXct("2018-01-31 03:16:57", tz="UTC")))
 options(old)
 
 # 1 is treated as . in dcast formula, #4615
@@ -17191,11 +17302,11 @@ test(2158.2, DT[, by="index", list(value=list(value))],
 DT = data.table(x = 1)
 test(2159.01, typeof(as.matrix(DT)), "double")
 test(2159.02, typeof(as.matrix(DT[0L])), "double")
-test(2159.03, min(DT[0L]), Inf, warning="missing")  # R's warning message; use one word 'missing' to insulate from possible future changes to R's message
+test(2159.03, min(DT[0L]), Inf, warning=base_messages$empty_min)
 DT = data.table(x = 1L)
 test(2159.04, typeof(as.matrix(DT)), "integer")
 test(2159.05, typeof(as.matrix(DT[0L])), "integer")
-test(2159.06, min(DT[0L]), Inf, warning="missing")
+test(2159.06, min(DT[0L]), Inf, warning=base_messages$empty_min)
 DT = data.table(x = TRUE)
 test(2159.07, typeof(as.matrix(DT)), "logical")
 test(2159.08, typeof(as.matrix(DT[0L])), "logical")
@@ -17273,14 +17384,388 @@ if (test_bit64) {
   test(2164.3, d[, mean(b, na.rm=TRUE), by=a], data.table(a=INT(1,2), V1=c(2.5, 4)))
 }
 
-# Test new feature %notin%, #4152
-test(2165.1, 11 %notin% 1:10, TRUE)
-test(2165.2, "a" %notin% c(), TRUE)
-test(2165.3, "a" %notin% c("a", "b", "c"), FALSE)
-test(2165.4, c(1, 2) %notin% c(1,2,3), c(FALSE, FALSE))
-test(2165.5, "a" %notin% character(), TRUE)
-test(2165.6, "a" %notin% integer(), TRUE)
-test(2165.7, "a" %notin% NULL, TRUE)
-test(2165.8, NA %notin% 1:5, TRUE)
-test(2165.9, NA %notin% c(1:5, NA), FALSE)
 
+# invalid key when by=.EACHI, haskey(i) but on= non-leading-subset of i's key, #4603 #4911
+X = data.table(id = c(6456372L, 6456372L, 6456372L, 6456372L,6456372L, 6456372L, 6456372L, 6456372L, 6456372L, 6456372L, 6456372L, 6456372L, 6456372L, 6456372L),
+               id_round = c(197801L, 199405L, 199501L, 197901L, 197905L, 198001L, 198005L, 198101L, 198105L, 198201L, 198205L, 198301L, 198305L, 198401L),
+               field = c(NA, NA, NA, "medicine", "medicine", "medicine", "medicine", "medicine", "medicine", "medicine", "medicine", "medicine", "medicine", "medicine"),
+               key = "id")
+Y = data.table(id = c(6456372L, 6456345L, 6456356L),
+               id_round = c(197705L, 197905L, 201705L),
+               field = c("medicine", "teaching", "health"),
+               prio = c(6L, 1L, 10L),
+               key = c("id_round", "id", "prio", "field" ))
+test(2165.1, X[Y, on = .(id, id_round > id_round, field), .(x.id_round[1], i.id_round[1]), by=.EACHI][id==6456372L],
+             data.table(id=6456372L, id_round=197705L, field='medicine', V1=197901L, V2=197705L))
+# Y$id_round happens to be sorted, so in 2165.2 we test Y$field which is not sorted
+test(2165.2, X[Y, on="field", .(x.id_round[1]), by=.EACHI][field=="health"],
+             data.table(field="health", V1=NA_integer_))
+# a minimal example too ...
+X = data.table(A=c(4L,2L,3L), B=1:3, key="A")
+Y = data.table(A=2:1, B=2:3, key=c("B","A"))
+test(2165.3, X[Y],                          data.table(A=2:3, B=2:3, i.A=2:1, key="A"))  # keyed
+test(2165.4, X[Y, on=.(A)],                 data.table(A=2:1, B=c(2L,NA), i.B=2:3))      # no key
+test(2165.5, X[Y, on=.(A), x.B, by=.EACHI], data.table(A=2:1, x.B=c(2L,NA)))             # no key
+
+# missing j was caught in groupingsets but not cube, leading to unexpected error message, #4282
+DT = data.table(a=1)
+test(2166, cube(DT, by='a'), error="Argument 'j' is required")
+
+# fwrite support encoding "native" and "UTF-8", #1770
+latin1 = "fa\xE7ile"
+Encoding(latin1) = "latin1"
+utf8 = iconv(latin1, "latin1", "UTF-8")
+text = c(latin1, utf8, "aaaaaaaa")
+dt = data.table(A = text, B = as.factor(text))
+dt2 = data.table(A = text, B = text)
+csvfile = tempfile(fileext = ".csv")
+fwrite(dt, csvfile, encoding = "UTF-8", bom = TRUE)
+test(2167.1, fread(csvfile, encoding = "UTF-8"), dt2)
+if (identical(text, enc2native(text))) { # ensure native encoding can represent latin1 strings
+  fwrite(dt, csvfile, encoding = "native")
+  test(2167.2, fread(csvfile), dt2)
+}
+test(2167.3, fwrite(dt, csvfile, encoding="nativ"), error="Argument 'encoding' must be")
+unlink(csvfile)
+
+# check valid trunc.cols=, #4766
+DT = data.table(x = rnorm(10))
+test(2168.01, print(DT, trunc.cols = 5L), error=c("Valid options for trunc.cols are TRUE and FALSE"))
+test(2168.02, print(DT, trunc.cols = NA), error=c("Valid options for trunc.cols are TRUE and FALSE"))
+test(2168.03, print(DT, trunc.cols = "thing"), error=c("Valid options for trunc.cols are TRUE and FALSE"))
+test(2168.04, print(DT, trunc.cols = c(TRUE, FALSE)), error=c("Valid options for trunc.cols are TRUE and FALSE"))
+
+# shallow copy of .SD must be unlocked for frank using na.last=NA or ties.method='random', #4429
+DT = data.table(a=1:10)
+test(2169.1, DT[ , frankv(.SD, ties.method='average', na.last=NA)], as.double(1:10))
+test(2169.2, DT[ , frankv(.SD, ties.method='random')], 1:10)
+# coverage tests for some issues discovered on the way
+DT[, c('..na_prefix..', '..stats_runif..') := 1L]
+test(2169.3, DT[ , frankv(.SD, ties.method='average', na.last=NA)], error="Input column '..na_prefix..' conflicts")
+test(2169.4, DT[ , frankv(.SD, ties.method='random')],              error="Input column '..stats_runif..' conflicts")
+
+# which=NA inconsistent with ?data.table, #4411
+DT = data.table(A = c(NA, 3, 5, 0, 1, 2), B = c("foo", "foo", "foo", "bar", "bar", "bar"))
+test(2170.1, DT[A > 1, which = NA], c(1L,4:5))
+test(2170.2, DT[A > -1, which = NA], 1L)
+test(2170.3, DT[A > -1 | is.na(A), which = NA], integer())
+test(2170.4, DT[A > 10, which = NA], seq_len(nrow(DT)))
+test(2170.5, DT[!(A > 1), which = NA], c(1:3,6L)) # matches DT[A <= 1, which = NA]
+
+# data.table() zero-nrow result if any non-null & atomic element is length 0, #3727
+test(2171.1, data.table(A=double(), B=1:2), data.table(A=double(), B=integer()))
+DT = data.table(CODE=c('a','b'), DATE=1:2, VALUE=c(1.3, 1.5), key=c('CODE','DATE'))
+test(2171.2, DT[J(character(), 1), VALUE], double()) # because "J" is a wrapper of list()
+test(2171.3, data.table(A=NULL, B=1.0), data.table(B=1.0)) # NULL is omited
+test(2171.4, NROW(data.table(A=list(), B=1.0)), 1L) # empty list() regarded as `list(list())` which is length 1, and recycled
+DT = data.table(A=1:3, B=letters[1:3])
+test(2171.5, ans <- DT[A>3,   .(ITEM='A>3', A, B)],  # now identical as expected
+                    DT[A>3][, .(ITEM='A>3', A, B)])
+test(2171.6, ans, data.table(ITEM=character(), A=integer(), B=character())) # not just identical to each other, but correct too
+
+# don't remove 'newclass' from jval's result, #4324
+A = data.table(COL = 'dt')
+class(A) = c('newclass', class(A))
+DT = data.table(LIST_COL = list(A, A))
+test(2172, class(DT[1, LIST_COL[[1]]]), class(A))
+
+# as.data.table.list edits list elements, so must be sure x does not use some other `[[` method, #4526
+x = data.frame(a = 1:5)
+x$b = matrix(6:15, ncol=2L)
+class(x) = c('foo', 'data.frame')
+`[[.foo` = function(x, i) {
+  if (any(sapply(x, inherits, 'data.table'))) stop('failure')
+  as.list(x)[[i]]
+}
+test(2173, as.data.table(x), data.table(a=1:5, b.V1=6:10, b.V2=11:15))
+
+# rbind two length-0 ordered factors, #4795
+DT = data.table(A = ordered(character()))
+test(2174, rbind(DT, DT), DT)
+
+## set row.names when a null data.table has a column assigned for the first time, #4597
+DT = data.table()
+test(2175.1, attr(DT[, x:=1:5], "row.names"), 1:5)
+DT = data.table()
+set(DT, j=c("v1","v2"), value=list(1:6, 2:7))
+test(2175.2, attr(DT, "row.names"), 1:6)
+DT = data.table(x=integer())
+test(2175.3, DT[, y:=3L], data.table(x=integer(), y=integer())) # in keeping with recent #4262, view as recycling the length-1 3L to match the length-0 data
+
+# `keyby`=TRUE/FALSE together with by=, #4307
+DT = data.table(a=2:1, b=3:2, d=4:3)
+test(2176.1, DT[, .SD, by="a", keyby=FALSE], data.table(a=2:1,b=3:2,d=4:3))
+test(2176.2, DT[, .SD, by="a", keyby=TRUE], data.table(a=1:2,b=2:3,d=3:4, key="a"))
+
+# check fwrite output using new default separator option, #4956
+DT = data.table(a=1, b=2)
+options(datatable.fwrite.sep='\t')
+test(2177.01, fwrite(DT), output='a\tb\n1\t2')
+options(datatable.fwrite.sep=';')
+test(2177.02, fwrite(DT), output='a;b\n1;2')
+options(datatable.fwrite.sep=NULL)
+test(2177.03, fwrite(DT), output='a,b\n1,2')
+
+# segfault when joining and grouping and some rows don't match, #4892
+x = data.table(id = 1:4, key = 'id')
+y = data.table(id = 2:5, key = 'id')
+z = data.table(c=c(2L, 2L, 1L, 1L), id=c(2L, 4L, 3L, NA))
+test(2178, x[y, .SD, by=.(c(2L, 1L, 2L, 1L))], z)
+
+# assigning all-na length>1 to a factor column was segfault, #4824
+DT = data.table(FACTOR = factor(rep("a", 3L)))
+set(DT, i=1:2, j="FACTOR", value=rep(NA, 2L))
+test(2179, DT$FACTOR, factor(c(NA, NA, "a")))
+
+# deleting duplicated column name removes only first
+DT = data.table(a=1, b=2, a=3)
+test(2180, DT[, a:=NULL], data.table(b=2, a=3))
+
+# as.data.table(table(NULL)) was error, #4179
+test(2181, as.data.table(table(NULL)), data.table(NULL))
+
+# some missing variables in melt, #4027
+DT.wide = data.table(a2=2, b1=1, b2=2)
+expected = data.table(variable=factor(1:2), a=c(NA,2), b=c(1,2))
+test(2182.1, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3)), expected)
+test(2182.2, melt(DT.wide, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2"))), expected)
+DTid = data.table(DT.wide, id=1)
+exid = data.table(id=1, expected)
+test(2182.3, melt(DTid, measure.vars=list(a=c(NA,1), b=2:3), id.vars="id"), exid)
+test(2182.4, melt(DTid, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2")), id.vars="id"), exid)
+test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE)[, .(a, b)], data.table(a=2, b=2))#not testing variable because it is not computed correctly, #4455
+
+### First block testing measurev
+# new variable_table attribute for measure.vars, PR#4731 for multiple issues
+measurev = function(cols)cols # user-defined function for computing measure.vars, same name as data.table::measure but user-defined version should be used.
+test(2183.00001, melt(DT.wide, measure.vars=measurev()), data.table(variable=factor(c("a2","b1","b2")), value=c(2,1,2)))
+measurev = list("foo", "bar")#measurev below should not use this since it is not a function.
+test(2183.00002, melt(DTid, measure.vars=measurev(list(value.name=NULL, num=as.complex), pattern="([ab])([12])")), error="Type 'complex' not supported for joining/merging")
+test(2183.00004, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=NULL), pattern="([ab])([12])"))[order(b)], data.table(id=1, istr=paste(c(1,2)), a=c(NA, 2), b=c(1,2)))
+test(2183.00005, melt(DTid, measure.vars=measurev(list(column=NULL, istr=NULL), pattern="([ab])([12])", multiple.keyword="column"))[order(b)], data.table(id=1, istr=paste(c(1,2)), a=c(NA, 2), b=c(1,2)))#same computation but different multiple.keyword
+iris.dt = data.table(datasets::iris)
+test(2183.00020, melt(iris.dt, measure.vars=measurev(value.name, dim, sep=".", pattern="foo")), error="both sep and pattern arguments used; must use either sep or pattern (not both)")
+test(2183.000201, melt(iris.dt, measure.vars=measurev(list(NULL, dim=NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: 1")
+test(2183.000202, melt(iris.dt, measure.vars=measurev(list(NULL, NULL), sep=".")), error="in measurev, elements of fun.list must be named, problems: 1,2")
+test(2183.00027, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim="bar"), sep=".")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: dim")
+test(2183.00028, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), sep=".")), error="number of elements of fun.list =3 must be same as max number of items after splitting column names =2")
+test(2183.00042, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function()1), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr")
+test(2183.00043, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=interactive), pattern="([ab])([12])")), error="in the measurev fun.list, each non-NULL element must be a function with at least one argument, problem: istr")
+test(2183.00044, melt(DTid, measure.vars=measurev(list(value.name=NULL, istr=function(x)1), pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr")
+test(2183.00045, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, dim=NULL, baz=NULL), pattern="(.*)[.](.*)")), error="number of elements of fun.list =3 must be same as number of capture groups in pattern =2")
+test(2183.00048, melt(iris.dt, measure.vars=measurev(list(value.name=NULL, value.name=NULL), sep=".")), error="elements of fun.list should be uniquely named, problems: value.name")
+# measure with factor conversion.
+myfac = function(x)factor(x)#user-defined conversion function.
+test(2183.00060, melt(DTid, measure.vars=measurev(list(letter=myfac, value.name=NULL), pattern="([ab])([12])")), data.table(id=1, letter=factor(c("a","b")), "2"=c(2,2), "1"=c(NA,1)))
+
+### Second block testing measure
+# new variable_table attribute for measure.vars, PR#4731 for multiple issues
+measure = function(cols)cols # user-defined function for computing measure.vars, same name as data.table::measure but user-defined version should be used.
+test(2183.01, melt(DT.wide, measure.vars=measure()), data.table(variable=factor(c("a2","b1","b2")), value=c(2,1,2)))
+measure = list("foo", "bar")#measure below should not use this since it is not a function.
+test(2183.02, melt(DTid, measure.vars=measure(value.name, num=as.complex, pattern="([ab])([12])")), error="Type 'complex' not supported for joining/merging")
+test(2183.03, melt(DTid, measure.vars=structure(list(a=c(NA,"a2"),b=c("b1","b2")), variable_table=data.table(number=as.complex(1:2)))), error="variable_table does not support column type 'complex' for column 'number'")
+test(2183.04, melt(DTid, measure.vars=measure(value.name, istr, pattern="([ab])([12])"))[order(b)], data.table(id=1, istr=paste(c(1,2)), a=c(NA, 2), b=c(1,2)))
+test(2183.05, melt(DTid, measure.vars=measure(column, istr, pattern="([ab])([12])", multiple.keyword="column"))[order(b)], data.table(id=1, istr=paste(c(1,2)), a=c(NA, 2), b=c(1,2)))#same computation but different multiple.keyword
+test(2183.06, melt(DTid, measure.vars=structure(list(1, 2), variable_table="foo")), error="variable_table attribute of measure.vars should be either NULL or a data table")
+test(2183.07, melt(DTid, measure.vars=structure(1:3, variable_table="foo")), error="variable_table attribute of measure.vars should be either NULL or a data table")
+test(2183.08, melt(DTid, measure.vars=structure(1:3, variable_table=data.table())), error="variable_table attribute of measure.vars should be a data table with at least one column")
+test(2183.09, melt(DTid, measure.vars=structure(1:3, variable_table=data.table(x=1))), error="variable_table attribute of measure.vars should be a data table with same number of rows as max length of measure.vars vectors =3")
+test(2183.10, melt(DTid, measure.vars=structure(list(a=1, b=2:3), variable_table=data.table(x=1))), error="variable_table attribute of measure.vars should be a data table with same number of rows as max length of measure.vars vectors =2")
+test(2183.11, melt(DTid, measure.vars=structure(list(a=1, b=2:3), variable_table=list(x=1:2, y=1))), error="variable_table attribute of measure.vars should be a data table with same number of rows as max length of measure.vars vectors =2")#make sure to check each list element, not just the first.
+# general measure errors.
+iris.dt = data.table(datasets::iris)
+test(2183.20, melt(iris.dt, measure.vars=measure(value.name, dim, sep=".", pattern="foo")), error="both sep and pattern arguments used; must use either sep or pattern (not both)")
+# school example.
+schools.wide <- data.table(
+  school = c("A","B"),
+  read_1 = c(1.1,2.1), read_1_sp = c(T,T),
+  read_2 = c(1.2,2.2),
+  math_1 = c(10.1,20.1), math_1_sp = c(T,T),
+  math_2 = c(NA,20.2), math_2_sp = c(NA,F))
+schools.tall <- melt(schools.wide, na.rm=TRUE, measure.vars=measure(subject, number=as.integer, value.name=function(x)ifelse(x=="", "score", "sp"), pattern="([^_]+)_([12])(.*)"))
+schools.expected = data.table(school=c("A","B","A","B","B"), subject=c("read","read","math","math","math"), number=as.integer(c(1,1,1,1,2)), score=c(1.1,2.1,10.1,20.1,20.2), sp=c(T,T,T,T,F))
+test(2183.21, schools.tall, schools.expected)
+who <- data.table(id=1, new_sp_m5564=2, newrel_f65=3)
+test(2183.22, melt(who, measure.vars=measure(diagnosis, gender, ages, ymin=as.numeric, ymax=function(y)ifelse(y=="", Inf, as.numeric(y)), pattern="new_?(?<diagnosis>.*)_(?<gender>.)(?<ages>(?<ymin>0|[0-9]{2})(?<ymax>[0-9]{0,2}))")), data.table(id=1, diagnosis=c("sp","rel"), gender=c("m","f"), ages=c("5564","65"), ymin=c(55,65), ymax=c(64,Inf), value=c(2,3)))
+wide.again = dcast(schools.tall, school ~ subject + number, value.var = c("score","sp"))
+# measure with sep=
+test(2183.23, melt(wide.again, na.rm=TRUE, measure.vars=measure(value.name, subject, number=as.integer))[order(score)], schools.expected)#should work without sep due to same default _ as dcast.
+test(2183.24, names(melt(iris.dt, measure.vars=measure(value.name, dim, sep="."))), c("Species", "dim", "Sepal", "Petal"))
+test(2183.25, names(melt(iris.dt, measure.vars=measure(part, value.name, sep="."))), c("Species", "part", "Length", "Width"))
+test(2183.26, names(melt(iris.dt, measure.vars=measure(part, dim, sep="."))), c("Species", "part", "dim", "value"))
+test(2183.27, melt(iris.dt, measure.vars=measure(value.name, dim="bar", sep=".")), error="each ... argument to measure must be a function with at least one argument, problem: dim")
+test(2183.28, melt(iris.dt, measure.vars=measure(value.name, dim, baz, sep=".")), error="number of ... arguments to measure =3 must be same as max number of items after splitting column names =2")
+test(2183.29, melt(iris.dt, measure.vars=measure()), error="each column name results in only one item after splitting using sep, which means that all columns would be melted; to fix please either specify melt on all columns directly without using measure, or use a different sep/pattern specification")
+# patterns with iris data.
+test(2183.40, names(melt(iris.dt, measure.vars=patterns("[.]"))), c("Species", "variable", "value"))
+# measure with pattern=
+test(2183.41, melt(DTid, measure.vars=measure(value.name, istr="bar", pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr")
+test(2183.42, melt(DTid, measure.vars=measure(value.name, istr=function()1, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr")
+test(2183.43, melt(DTid, measure.vars=measure(value.name, istr=interactive, pattern="([ab])([12])")), error="each ... argument to measure must be a function with at least one argument, problem: istr")
+test(2183.44, melt(DTid, measure.vars=measure(value.name, istr=function(x)1, pattern="([ab])([12])")), error="each conversion function must return an atomic vector with same length as its first argument, problem: istr")
+test(2183.45, melt(iris.dt, measure.vars=measure(value.name, dim, baz, pattern="(.*)[.](.*)")), error="number of ... arguments to measure =3 must be same as number of capture groups in pattern =2")
+test(2183.46, melt(iris.dt, measure.vars=measure(function(x)factor(x), dim, pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: 1")
+test(2183.47, melt(iris.dt, measure.vars=measure(function(x)factor(x), pattern="(.*)[.](.*)")), error="each ... argument to measure must be either a symbol without argument name, or a function with argument name, problems: 1")
+test(2183.48, melt(iris.dt, measure.vars=measure(value.name, value.name, sep=".")), error="... arguments to measure should be uniquely named, problems: value.name")
+# measure with factor conversion.
+myfac = function(x)factor(x)#user-defined conversion function.
+test(2183.60, melt(DTid, measure.vars=measure(letter=myfac, value.name, pattern="([ab])([12])")), data.table(id=1, letter=factor(c("a","b")), "2"=c(2,2), "1"=c(NA,1)))
+# measure errors.
+iris.i <- 1
+iris.num <- datasets::iris[iris.i, 1:4]
+iris.days <- data.table(
+  day1=iris.num, day2=iris.num, Species=iris$Species[iris.i])
+test(2183.61, melt(iris.days, measure.vars=measure(before=as.integer, value.name, dim, sep=".")), error="before conversion function returned vector of all NA", warning=base_messages$coerce_na)
+test(2183.62, melt(iris.days, measure.vars=measure(before=function(x)rep(4, length(x)), value.name, dim, sep=".")), error="number of unique groups after applying type conversion functions less than number of groups, change type conversion")
+test(2183.63, melt(iris.days, measure.vars=measure(before, value.name, dim, pattern="(day)[12][.](.*)[.](.*)")), error="number of unique column IDs =4 is less than number of melted columns =8; fix by changing pattern/sep")
+test(2183.64, melt(iris.days, measure.vars=measure(day=as.integer, value.name, dim, pattern="day(.)[.](.*)[.](.*)")), data.table(Species=factor("setosa"), day=as.integer(c(1,2,1,2)), dim=c("Length","Length","Width","Width"), Sepal=c(5.1,5.1,3.5,3.5), Petal=c(1.4,1.4,0.2,0.2)))
+test(2183.65, melt(iris.days, measure.vars=measure(pattern="day")), error="pattern must contain at least one capture group (parenthesized sub-pattern)")
+test(2183.66, melt(iris.days, measure.vars=measure(value.name, pattern="(.*)")), error="value.name is the only group; fix by creating at least one more group")
+test(2183.67, melt(iris.days, measure.vars=measure(foo, bar, pattern="(foo)(bar)")), error="pattern did not match any cols, so nothing would be melted; fix by changing pattern")
+test(2183.68, melt(iris.days, measure.vars=measure(value.name, bar, pattern="(foo)(bar)")), error="pattern did not match any cols, so nothing would be melted; fix by changing pattern")
+test(2183.69, melt(data.table(ff=1, ff=2), measure.vars=measure(letter, number, pattern="(.)(.)")), error="measured columns should be uniquely named, problems: ff")
+test(2183.70, melt(data.table(f_f=1, f_f=2), measure.vars=measure(letter, number)), error="measured columns should be uniquely named, problems: f_f")
+test(2183.71, melt(iris.days, measure.vars=measure(value.name=as.integer, variable, pattern="day(.)[.](.*)")), error="value.name column class=integer after applying conversion function, but must be character")
+test(2183.72, melt(data.table(ff=1, ff=2, a=3, b=4), measure.vars=measure(letter, pattern="([ab])"), id.vars="ff"), data.table(ff=1, letter=c("a","b"), value=c(3,4)))#duplicate column names are fine if they are not matched by pattern.
+test(2183.73, melt(DTid, measure.vars=measure(letter, multiple.keyword, pattern="([ab])([12])")), error="group names specified in ... conflict with measure argument names; please fix by changing group names: multiple.keyword")
+test(2183.74, melt(DTid, measure.vars=measure(letter, number, multiple.keyword=as.integer, pattern="([ab])([12])")), error="multiple.keyword must be a character string")
+test(2183.75, melt(DTid, measure.vars=measure(letter, number, multiple.keyword=NA_character_, pattern="([ab])([12])")), error="multiple.keyword must be a character string")
+test(2183.76, melt(DTid, measure.vars=measure(letter, number, multiple.keyword="", pattern="([ab])([12])")), error="multiple.keyword must be a character string with nchar>0")
+test(2183.77, melt(DTid, measure.vars=measure(letter, cols, pattern="([ab])([12])")), error="group names specified in ... conflict with measure argument names; please fix by changing group names: cols")
+test(2183.78, melt(DTid, measure.vars=measure(letter, cols=as.integer, pattern="([ab])([12])")), error="cols must be a character vector of column names")
+test(2183.79, melt(DTid, measure.vars=measure(letter, number, pattern=as.integer)), error="pattern must be character string")
+test(2183.80, melt(DTid, measure.vars=measure(letter, number, sep=as.integer)), error="sep must be character string")
+
+# `keyby` allows mixing eval/get with direct columns, #4981
+dt <- data.table(a=c(1,2), b=c(3,4), c=c(1,0))
+dt2 <- dt[,.(suma=sum(a)), keyby=.(b=get("b"),c)]
+test(2184.1, dt2[1, suma], 1)
+dt2 <- dt[2,.(suma=sum(a)), keyby=.(b=b,c)]
+test(2184.2, dt2[1, suma], 2)
+dt2 <- dt[2,.(suma=sum(a)), keyby=.(b=get("b"))]
+test(2184.3, dt2[1, suma], 2)
+dt2 <- dt[2,.(suma=sum(a)), keyby=.(b=get("b"),c)]
+test(2184.4, dt2[1, suma], 2)
+# #4873
+IDT = as.data.table(iris)
+vr = "Species"
+IDT[, virginca := get(vr) == "virginica"]
+ans = data.table(round = c(3, 3, 3, 2, 2, 4, 2, 4), k = c(6, 7, 8, 5, 7, 7, 6, 8), kar = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("setosa", "versicolor", "virginica"), class = "factor"), N = c(24L, 14L, 4L, 1L, 1L, 1L, 3L, 2L))
+test(2184.5, IDT[(virginca), .N, by = .(round(Sepal.Width), k = round(Sepal.Length), kar = get(vr))] , ans)
+
+# dcast() segfault or 'STRING_ELT() can only be applied to character not logical' fixed in v1.13.0, #2394
+agg = function(x) if(length(x) > 0) min(x) else NA
+DT = data.table(id=c(1,1,2,2), x=c('y','y','y','z'), v=c('a','b','c','d'))
+test(2185, dcast(DT, formula=id~x, fun.aggregate=agg, value.var='v'),
+           data.table(id=c(1,2), y=c('a','c'), z=c(NA,'d'), key="id"))
+
+# compatible branches might seem incompatible if the condition is global, #4274
+DT = data.table(a=1L)
+test(2186, DT[, if (TRUE) .(a=1L) else .(a=1L, b=2L)], DT,
+     warning='j may not evaluate to the same number of columns for each group')
+
+# col.names='none' should apply when wrapping too, #4270
+DT = setDT(replicate(getOption('width'), 1, simplify = FALSE))
+test(2187, {print(DT, col.names='none'); TRUE}, notOutput="V")
+
+# fifelse now supports vector na arguments and coerces NA to other types, PR#4289
+test(2188.01, fifelse(c(TRUE, FALSE, TRUE, NA), 1L, 2L, 1.0), c(1, 2, 1, 1))
+test(2188.02, fifelse(c(TRUE, FALSE, TRUE, NA), 1, 2, 1L), c(1, 2, 1, 1))
+test(2188.03, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, 101:104), c(1L, 12L, 3L, 104L))
+test(2188.04, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, 101:104), c(NA, 12L, NA, 104L))
+test(2188.05, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, 101:104), c(1L, NA, 3L, 104L))
+test(2188.06, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, 11:14, NA), c(1L, 12L, 3L, NA))
+test(2188.07, fifelse(c(TRUE, FALSE, TRUE, NA), 1:4, NA, NA), c(1L, NA, 3L, NA))
+test(2188.08, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA), c(NA, NA, NA, NA))
+test(2188.09, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, NA_character_), rep(NA_character_, 4L))
+test(2188.10, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, 101:104), c(NA, NA, NA, 104L))
+test(2188.11, fifelse(c(TRUE, FALSE, TRUE, NA), NA, 11:14, NA), c(NA, 12L, NA, NA))
+test(2188.12, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, as.Date("2020-01-01")), as.Date(c(NA, NA, NA, "2020-01-01")))
+test(2188.13, fifelse(TRUE, 1L, 2.0, "a"), error="'na' is of type character but 'no' is double. Please") # smart error message
+test(2188.14, fifelse(TRUE, NA, 2, as.Date("2019-07-07")),  error="'no' has different class than 'na'. Please")
+test(2188.15, fifelse(TRUE, NA, factor('a'), factor('a', levels = c('a','b'))), error="'no' and 'na' are both type factor but their levels are different")
+test(2188.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA 
+
+# rolling join expected output on non-matching join column has been fixed #1913
+DT = data.table(ID=1:5, A=c(1.3, 1.7, 2.4, 0.9, 0.6))
+buckets = data.table(BucketID=1:4, BinA=1:4)
+DT[, A.copy := A]
+test(2189.1, buckets[DT, on=c("BinA"="A"), roll=-Inf], data.table(BucketID = c(2L, 2L, 3L, 1L, 1L), BinA = c(1.3, 1.7, 2.4, 0.9, 0.6), ID = 1:5, A.copy = c(1.3, 1.7, 2.4, 0.9, 0.6)))
+buckets[, BinA := as.numeric(BinA)]
+test(2189.2, buckets[DT, on=c("BinA"="A"), roll=-Inf], data.table(BucketID = c(2L, 2L, 3L, 1L, 1L), BinA = c(1.3, 1.7, 2.4, 0.9, 0.6), ID = 1:5, A.copy = c(1.3, 1.7, 2.4, 0.9, 0.6)))
+
+# segfault subassigning non-list type to list column, #4166
+DT = data.table(a=list(1:2, 3, 4))
+test(2190.1, DT[, a:=1:4], error="Supplied 4 items to be assigned to 3 items of column 'a'.*please use rep")
+test(2190.2, DT[1:2, a:=structure(c(1L, 2L), att='t')          ]$a, list(structure(1L, att='t'),            structure(2L, att='t'),            4))
+test(2190.3, DT[1:2, a:=structure(c(1, 2), att='t')            ]$a, list(structure(1, att='t'),             structure(2, att='t'),             4))
+test(2190.4, DT[1:2, a:=structure(as.raw(c(1, 2)), att='t')    ]$a, list(structure(as.raw(1), att='t'),     structure(as.raw(2), att='t'),     4))
+test(2190.5, DT[1:2, a:=structure(as.complex(c(1, 2)), att='t')]$a, list(structure(as.complex(1), att='t'), structure(as.complex(2), att='t'), 4))
+test(2190.61, DT[1:2, a:=structure(c(TRUE, FALSE), att='t')    ]$a, list(structure(TRUE, att='t'),          structure(FALSE, att='t'),         4))
+test(2190.62, attributes(TRUE), NULL)  # ensure R's internal global TRUE/FALSE didn't receive attribute att='t'; discovered when merging #4595
+test(2190.63, attributes(FALSE), NULL)
+test(2190.7, DT[1:2, a:=structure(c('a', 'b'), att='t')        ]$a, list(structure('a', att='t'),           structure('b', att='t'),           4))
+if (test_bit64) {
+  test(2190.8, DT[1:2, a:=as.integer64(1:2)                    ]$a, list(as.integer64(1),                   as.integer64(2),                   4))
+}
+test(2190.9, DT[1:2, a:=call('sum', 1)], error="type 'language' cannot be coerced to 'list'")
+test(2190.91, attributes(TRUE), NULL)  # ensure R's internal global TRUE/FALSE didn't receive attribute att='t'; discovered when merging #4595
+test(2190.92, attributes(FALSE), NULL)
+
+# adding test for (since fixed) 'could not find function "."' when verbose=TRUE, #3196
+DT = data.table(i1 = c(234L, 250L, 169L, 234L, 147L, 96L, 96L, 369L, 147L, 96L), i4 = c(79L, 113L, 270L, -121L, 113L, 113L, -121L, 179L, -228L, 113L), v = 0)
+test(2191, DT[1:5, sum(v), by=.(i5 = 1:5 %% 2L), verbose=TRUE], data.table(i5=1:0, V1=c(0,0)), output="gforce")
+
+# base::as.Date was error when first item blank, affecting as.IDate, #4676
+test(2192.1, as.IDate(c('', '2020-01-01')), structure(c(NA_integer_, 18262L), class=c("IDate","Date")))
+test(2192.2, as.IDate(c('2020-01-01', '')), structure(c(18262L, NA_integer_), class=c("IDate","Date")))
+
+if (test_bit64) {
+  # subassign coerce to integer64 was fixed in 1.12.4, #2530
+  DT = data.table(a = as.integer64(1:10))
+  DT[a==1, a:=12]
+  DT[a==2, a:=as.integer64(13)]
+  test(2193.1, DT, data.table(a = as.integer64(c(12,13,3:10))))
+  
+  # X[Y,,by=.EACHI] when Y contains integer64 also fixed in 1.12.4, #3779
+  X = data.table(x=1:3)
+  Y = data.table(x=1:2, y=as.integer64(c(10,20)))
+  test(2193.2, X[Y, `:=`(y=i.y), on="x", by=.EACHI], data.table(x=1:3, y=as.integer64(10L,20L,NA)))
+}
+
+# compatibility of endsWith backport with base::endsWith
+if (exists('endsWith', 'package:base', inherits=FALSE)) {
+  DTendsWith = function(x, stub) {n=nchar(x); substr(x, n-nchar(stub)+1L, n)==stub}
+  BSendsWith = base::endsWith
+  test(2194.1, DTendsWith('abcd', 'd'), BSendsWith('abcd', 'd'))
+  test(2194.2, DTendsWith(letters, 'e'), BSendsWith(letters, 'e'))
+  test(2194.3, DTendsWith(NA_character_, 'a'), BSendsWith(NA_character_, 'a'))
+  test(2194.4, DTendsWith(character(), 'a'), BSendsWith(character(), 'a'))
+  # file used in encoding tests
+  txt = readLines(testDir("issue_563_fread.txt"))
+  test(2194.5, DTendsWith(txt, 'B'), BSendsWith(txt, 'B'))
+}
+
+# uniqueN(x, by=character()) was internal error, #4594
+DT = data.table(idx=c(1L,2L,1L,3L), value="val")
+test(2195.1, uniqueN(DT, by=character(0L)), 3L)
+test(2195.2, uniqueN(DT, by=NULL), 3L)
+test(2195.3, unique(DT, by=character(0L)), ans<-data.table(idx=1:3, value="val"))
+test(2195.4, unique(DT, by=NULL), ans)
+test(2195.5, duplicated(DT, by=character(0L)), ans<-c(FALSE, FALSE, TRUE, FALSE))
+test(2195.6, duplicated(DT, by=NULL), ans)
+test(2195.7, anyDuplicated(DT, by=character(0L)), 3L)
+test(2195.8, anyDuplicated(DT, by=NULL), 3L)
+
+
+# Test new feature %notin%, #4152
+test(2196.1, 11 %notin% 1:10, TRUE)
+test(2196.2, "a" %notin% c(), TRUE)
+test(2196.3, "a" %notin% c("a", "b", "c"), FALSE)
+test(2196.4, c(1, 2) %notin% c(1,2,3), c(FALSE, FALSE))
+test(2196.5, "a" %notin% character(), TRUE)
+test(2196.6, "a" %notin% integer(), TRUE)
+test(2196.7, "a" %notin% NULL, TRUE)
+test(2196.8, NA %notin% 1:5, TRUE)
+test(2196.9, NA %notin% c(1:5, NA), FALSE)
\ No newline at end of file
diff --git a/man/address.Rd b/man/address.Rd
index 258c0241f2..8363d3c7ba 100644
--- a/man/address.Rd
+++ b/man/address.Rd
@@ -16,8 +16,15 @@ Sometimes useful in determining whether a value has been copied or not, programm
 \value{
     A character vector length 1.
 }
+\seealso{
+  \code{\link{copy}}
+}
 \references{
-\url{https://stackoverflow.com/a/10913296/403310} (but implemented in C without using \code{.Internal(inspect())})
+  \url{https://stackoverflow.com/a/10913296/403310} (but implemented in C without using \code{.Internal(inspect())})
+}
+\examples{
+x=1
+address(x)
 }
 \keyword{ data }
 
diff --git a/man/assign.Rd b/man/assign.Rd
index 5cfc42b9a9..f622755606 100644
--- a/man/assign.Rd
+++ b/man/assign.Rd
@@ -63,7 +63,7 @@ For additional resources, please read \href{../doc/datatable-faq.html}{\code{vig
 
 When \code{LHS} is a factor column and \code{RHS} is a character vector with items missing from the factor levels, the new level(s) are automatically added (by reference, efficiently), unlike base methods.
 
-Unlike \code{<-} for \code{data.frame}, the (potentially large) LHS is not coerced to match the type of the (often small) RHS. Instead the RHS is coerced to match the type of the LHS, if necessary. Where this involves double precision values being coerced to an integer column, a warning is given (whether or not fractional data is truncated). The motivation for this is efficiency. It is best to get the column types correct up front and stick to them. Changing a column type is possible but deliberately harder: provide a whole column as the RHS. This RHS is then \emph{plonked} into that column slot and we call this \emph{plonk syntax}, or \emph{replace column syntax} if you prefer. By needing to construct a full length vector of a new type, you as the user are more aware of what is happening, and it is clearer to readers of your code that you really do intend to change the column type.
+Unlike \code{<-} for \code{data.frame}, the (potentially large) LHS is not coerced to match the type of the (often small) RHS. Instead the RHS is coerced to match the type of the LHS, if necessary. Where this involves double precision values being coerced to an integer column, a warning is given when fractional data is truncated. It is best to get the column types correct up front and stick to them. Changing a column type is possible but deliberately harder: provide a whole column as the RHS. This RHS is then \emph{plonked} into that column slot and we call this \emph{plonk syntax}, or \emph{replace column syntax} if you prefer. By needing to construct a full length vector of a new type, you as the user are more aware of what is happening and it is clearer to readers of your code that you really do intend to change the column type; e.g., \code{DT[, colA:=as.integer(colA)]}. A plonk occurs whenever you provide a RHS value to `:=` which is \code{nrow} long. When a column is \emph{plonked}, the original column is not updated by reference because that would entail updating every single element of that column whereas the plonk is just one column pointer update.
 
 \code{data.table}s are \emph{not} copied-on-change by \code{:=}, \code{setkey} or any of the other \code{set*} functions. See \code{\link{copy}}.
 }
@@ -72,7 +72,7 @@ Unlike \code{<-} for \code{data.frame}, the (potentially large) LHS is not coerc
 
 Since \code{[.data.table} incurs overhead to check the existence and type of arguments (for example), \code{set()} provides direct (but less flexible) assignment by reference with low overhead, appropriate for use inside a \code{for} loop. See examples. \code{:=} is more powerful and flexible than \code{set()} because \code{:=} is intended to be combined with \code{i} and \code{by} in single queries on large datasets.
 }
-\section{Note:}{
+\note{
     \code{DT[a > 4, b := c]} is different from \code{DT[a > 4][, b := c]}. The first expression updates (or adds) column \code{b} with the value \code{c} on those rows where \code{a > 4} evaluates to \code{TRUE}. \code{X} is updated \emph{by reference}, therefore no assignment needed.
 
     The second expression on the other hand updates a \emph{new} \code{data.table} that's returned by the subset operation. Since the subsetted data.table is ephemeral (it is not assigned to a symbol), the result would be lost; unless the result is assigned, for example, as follows: \code{ans <- DT[a > 4][, b := c]}.
diff --git a/man/cdt.Rd b/man/cdt.Rd
index ea7c3a76eb..8c0846cac9 100644
--- a/man/cdt.Rd
+++ b/man/cdt.Rd
@@ -2,18 +2,25 @@
 \alias{cdatatable}
 \title{ data.table exported C routines }
 \description{
-  Note that this interface is going to be changed in next release.
   Some of internally used C routines are now exported. This interface should be considered experimental. List of exported C routines and their signatures are provided below in the usage section.
 }
 \usage{
-# SEXP subsetDT(SEXP x, SEXP rows, SEXP cols);
-# p_dtCsubsetDT = R_GetCCallable("data.table", "CsubsetDT");
+# SEXP DT_subsetDT(SEXP x, SEXP rows, SEXP cols);
+# p_DT_subsetDT = R_GetCCallable("data.table", "DT_subsetDT");
 }
 \details{
-  For details how to use those see \emph{Writing R Extensions} manual \emph{Linking to native routines in other packages} section.
+  Details how to use those can be found in \emph{Writing R Extensions} manual \emph{Linking to native routines in other packages} section.
+  An example use with \code{Rcpp}:
+\preformatted{
+  dt = data.table::as.data.table(iris)
+  Rcpp::cppFunction("SEXP mysub2(SEXP x, SEXP rows, SEXP cols) { return DT_subsetDT(x,rows,cols); }",
+    include="#include <datatableAPI.h>",
+    depends="data.table")
+  mysub2(dt, 1:4, 1:4)
+}
 }
 \note{
-  Be aware C routines are likely to have less input validation than their corresponding R interface. For example one should not expect \code{DT[-5L]} will be equal to \code{.Call(CsubsetDT, DT, -5L, seq_along(DT))} because translation of \code{i=-5L} to \code{seq_len(nrow(DT))[-5L]} might be happening on R level. Moreover checks that \code{i} argument is in range of \code{1:nrow(DT)}, missingness, etc. might be happening on R level too.
+  Be aware C routines are likely to have less input validation than their corresponding R interface. For example one should not expect \code{DT[-5L]} will be equal to \code{.Call(DT_subsetDT, DT, -5L, seq_along(DT))} because translation of \code{i=-5L} to \code{seq_len(nrow(DT))[-5L]} might be happening on R level. Moreover checks that \code{i} argument is in range of \code{1:nrow(DT)}, missingness, etc. might be happening on R level too.
 }
 \references{
   \url{https://cran.r-project.org/doc/manuals/r-release/R-exts.html}
diff --git a/man/copy.Rd b/man/copy.Rd
index 819fa2a509..587f216805 100644
--- a/man/copy.Rd
+++ b/man/copy.Rd
@@ -16,11 +16,15 @@ copy(x)
 \code{data.table} provides functions that operate on objects \emph{by reference} and minimise full object copies as much as possible. Still, it might be necessary in some situations to work on an object's copy which can be done using \code{DT.copy <- copy(DT)}. It may also be sometimes useful before \code{:=} (or \code{set}) is used to subassign to a column by reference.
 
 A \code{copy()} may be required when doing \code{dt_names = names(DT)}. Due to R's \emph{copy-on-modify}, \code{dt_names} still points to the same location in memory as \code{names(DT)}. Therefore modifying \code{DT} \emph{by reference} now, say by adding a new column, \code{dt_names} will also get updated. To avoid this, one has to \emph{explicitly} copy: \code{dt_names <- copy(names(DT))}.
-  }
+}
+\note{
+  To confirm precisely whether an object is a copy of another, compare their exact memory address with \code{\link{address}}.
+}
 \value{
-    Returns a copy of the object.
+  Returns a copy of the object.
 }
-\seealso{ \code{\link{data.table}}, \code{\link{setkey}}, \code{\link{setDT}}, \code{\link{setDF}}, \code{\link{set}} \code{\link{:=}}, \code{\link{setorder}}, \code{\link{setattr}}, \code{\link{setnames}}
+\seealso{
+  \code{\link{data.table}}, \code{\link{address}}, \code{\link{setkey}}, \code{\link{setDT}}, \code{\link{setDF}}, \code{\link{set}} \code{\link{:=}}, \code{\link{setorder}}, \code{\link{setattr}}, \code{\link{setnames}}
 }
 \examples{
 # Type 'example(copy)' to run these at prompt and browse output
diff --git a/man/data.table.Rd b/man/data.table.Rd
index 59b6aae1e1..e934028a3b 100644
--- a/man/data.table.Rd
+++ b/man/data.table.Rd
@@ -31,7 +31,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
   .SDcols,
   verbose = getOption("datatable.verbose"),                   # default: FALSE
   allow.cartesian = getOption("datatable.allow.cartesian"),   # default: FALSE
-  drop = NULL, on = NULL)
+  drop = NULL, on = NULL, env = NULL)
 }
 \arguments{
     \item{\dots}{ Just as \code{\dots} in \code{\link{data.frame}}. Usual recycling rules are applied to vectors of different lengths to create a list of equal length vectors.}
@@ -110,7 +110,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
 
     \emph{Advanced:} In the \code{X[Y, j]} form of grouping, the \code{j} expression sees variables in \code{X} first, then \code{Y}. We call this \emph{join inherited scope}. If the variable is not in \code{X} or \code{Y} then the calling frame is searched, its calling frame, and so on in the usual way up to and including the global environment.}
 
-    \item{keyby}{ Same as \code{by}, but with an additional \code{setkey()} run on the \code{by} columns of the result, for convenience. It is common practice to use `keyby=` routinely when you wish the result to be sorted.}
+    \item{keyby}{ Same as \code{by}, but with an additional \code{setkey()} run on the \code{by} columns of the result, for convenience. It is common practice to use `keyby=` routinely when you wish the result to be sorted. May also be \code{TRUE} or \code{FALSE} when \code{by} is provided as an alternative way to accomplish the same operation.}
 
     \item{with}{ By default \code{with=TRUE} and \code{j} is evaluated within the frame of \code{x}; column names can be used as variables. In case of overlapping variables names inside dataset and in parent scope you can use double dot prefix \code{..cols} to explicitly refer to `\code{cols} variable parent scope and not from your dataset.
 
@@ -170,6 +170,8 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
         }
         See examples as well as \href{../doc/datatable-secondary-indices-and-auto-indexing.html}{\code{vignette("datatable-secondary-indices-and-auto-indexing")}}.
     }
+
+  \item{env}{ List or an environment, passed to \code{\link{substitute2}} for substitution of parameters in \code{i}, \code{j} and \code{by} (or \code{keyby}). Use \code{verbose} to preview constructed expressions. }
 }
 \details{
 \code{data.table} builds on base \R functionality to reduce 2 types of time:\cr
@@ -200,6 +202,7 @@ The way to read this out loud is: "Take \code{DT}, subset rows by \code{i}, \emp
     X[, sum(a), by=c:f]         # get sum(a) grouped by all columns in between 'c' and 'f' (both inclusive)
 
     X[, sum(a), keyby=b]        # get sum(a) grouped by 'b', and sort that result by the grouping column 'b'
+    X[, sum(a), by=b, keyby=TRUE] # same order as above, but using sorting flag
     X[, sum(a), by=b][order(b)] # same order as above, but by chaining compound expressions
     X[c>1, sum(a), by=c]        # get rows where c>1 is TRUE, and on those rows, get sum(a) grouped by 'c'
     X[Y, .(a, b), on="c"]       # get rows where Y$c == X$c, and select columns 'X$a' and 'X$b' for those rows
@@ -220,11 +223,11 @@ See the \code{see also} section for the several other \emph{methods} that are av
 
 }
 \references{
-\url{https://github.com/Rdatatable/data.table/wiki} (\code{data.table} homepage)\cr
+\url{https://r-datatable.com} (\code{data.table} homepage)\cr
 \url{https://en.wikipedia.org/wiki/Binary_search}
 }
-\note{ If \code{keep.rownames} or \code{check.names} are supplied they must be written in full because \R does not allow partial argument names after `\code{\dots}`. For example, \code{data.table(DF, keep=TRUE)} will create a
-column called \code{"keep"} containing \code{TRUE} and this is correct behaviour; \code{data.table(DF, keep.rownames=TRUE)} was intended.
+\note{ If \code{keep.rownames} or \code{check.names} are supplied they must be written in full because \R does not allow partial argument names after \code{\dots}. For example, \code{data.table(DF, keep=TRUE)} will create a
+column called \code{keep} containing \code{TRUE} and this is correct behaviour; \code{data.table(DF, keep.rownames=TRUE)} was intended.
 
 \code{POSIXlt} is not supported as a column type because it uses 40 bytes to store a single datetime. They are implicitly converted to \code{POSIXct} type with \emph{warning}. You may also be interested in \code{\link{IDateTime}} instead; it has methods to convert to and from \code{POSIXlt}.
 }
@@ -280,6 +283,7 @@ DT[["v"]]                      # same as DT[, v] but much faster
 # grouping operations - j and by
 DT[, sum(v), by=x]             # ad hoc by, order of groups preserved in result
 DT[, sum(v), keyby=x]          # same, but order the result on by cols
+DT[, sum(v), by=x, keyby=TRUE] # same, but using sorting flag
 DT[, sum(v), by=x][order(x)]   # same but by chaining expressions together
 
 # fast ad hoc row subsets (subsets as joins)
diff --git a/man/dcast.data.table.Rd b/man/dcast.data.table.Rd
index daf9fba655..2aa265a96c 100644
--- a/man/dcast.data.table.Rd
+++ b/man/dcast.data.table.Rd
@@ -61,16 +61,16 @@ Historical note: \code{dcast.data.table} was originally designed as an enhanceme
 \examples{
 ChickWeight = as.data.table(ChickWeight)
 setnames(ChickWeight, tolower(names(ChickWeight)))
-DT <- melt(as.data.table(ChickWeight), id=2:4) # calls melt.data.table
+DT <- melt(as.data.table(ChickWeight), id.vars=2:4) # calls melt.data.table
 
 # dcast is an S3 method in data.table from v1.9.6
-dcast(DT, time ~ variable, fun=mean) # using partial matching of argument
-dcast(DT, diet ~ variable, fun=mean)
+dcast(DT, time ~ variable, fun.aggregate=mean)
+dcast(DT, diet ~ variable, fun.aggregate=mean)
 dcast(DT, diet+chick ~ time, drop=FALSE)
 dcast(DT, diet+chick ~ time, drop=FALSE, fill=0)
 
 # using subset
-dcast(DT, chick ~ time, fun=mean, subset=.(time < 10 & chick < 20))
+dcast(DT, chick ~ time, fun.aggregate=mean, subset=.(time < 10 & chick < 20))
 
 # drop argument, #1512
 DT <- data.table(v1 = c(1.1, 1.1, 1.1, 2.2, 2.2, 2.2),
@@ -78,37 +78,37 @@ DT <- data.table(v1 = c(1.1, 1.1, 1.1, 2.2, 2.2, 2.2),
                  v3 = factor(c(2L, 3L, 5L, 1L, 2L, 6L), levels=1:6),
                  v4 = c(3L, 2L, 2L, 5L, 4L, 3L))
 # drop=TRUE
-dcast(DT, v1 + v2 ~ v3)                      # default is drop=TRUE
-dcast(DT, v1 + v2 ~ v3, drop=FALSE)          # all missing combinations of both LHS and RHS
-dcast(DT, v1 + v2 ~ v3, drop=c(FALSE, TRUE)) # all missing combinations of only LHS
-dcast(DT, v1 + v2 ~ v3, drop=c(TRUE, FALSE)) # all missing combinations of only RHS
+dcast(DT, v1+v2~v3, value.var='v4')                      # default is drop=TRUE
+dcast(DT, v1+v2~v3, value.var='v4', drop=FALSE)          # all missing combinations of LHS and RHS
+dcast(DT, v1+v2~v3, value.var='v4', drop=c(FALSE, TRUE)) # all missing combinations of LHS only
+dcast(DT, v1+v2~v3, value.var='v4', drop=c(TRUE, FALSE)) # all missing combinations of RHS only
 
 # using . and ...
 DT <- data.table(v1 = rep(1:2, each = 6),
                  v2 = rep(rep(1:3, 2), each = 2),
                  v3 = rep(1:2, 6),
                  v4 = rnorm(6))
-dcast(DT, \dots ~ v3, value.var = "v4") #same as v1 + v2 ~ v3, value.var = "v4"
-dcast(DT, v1 + v2 + v3 ~ ., value.var = "v4")
+dcast(DT, \dots ~ v3, value.var="v4") # same as v1+v2 ~ v3, value.var="v4"
+dcast(DT, v1+v2+v3 ~ ., value.var="v4")
 
 ## for each combination of (v1, v2), add up all values of v4
-dcast(DT, v1 + v2 ~ ., value.var = "v4", fun.aggregate = sum)
+dcast(DT, v1+v2 ~ ., value.var="v4", fun.aggregate=sum)
 
 # fill and types
-dcast(DT, v2 ~ v3, value.var = 'v1', fill = 0L)  #  0L --> 0
-dcast(DT, v2 ~ v3, value.var = 'v4', fill = 1.1) # 1.1 --> 1L
+dcast(DT, v2~v3, value.var='v1', fun.aggregate=length, fill=0L)  #  0L --> 0
+dcast(DT, v2~v3, value.var='v4', fun.aggregate=length, fill=1.1) # 1.1 --> 1L
 
 # multiple value.var and multiple fun.aggregate
 DT = data.table(x=sample(5,20,TRUE), y=sample(2,20,TRUE),
-                z=sample(letters[1:2], 20,TRUE), d1 = runif(20), d2=1L)
+                z=sample(letters[1:2], 20,TRUE), d1=runif(20), d2=1L)
 # multiple value.var
-dcast(DT, x + y ~ z, fun=sum, value.var=c("d1","d2"))
+dcast(DT, x+y ~ z, fun.aggregate=sum, value.var=c("d1","d2"))
 # multiple fun.aggregate
-dcast(DT, x + y ~ z, fun=list(sum, mean), value.var="d1")
+dcast(DT, x+y ~ z, fun.aggregate=list(sum, mean), value.var="d1")
 # multiple fun.agg and value.var (all combinations)
-dcast(DT, x + y ~ z, fun=list(sum, mean), value.var=c("d1", "d2"))
+dcast(DT, x+y ~ z, fun.aggregate=list(sum, mean), value.var=c("d1", "d2"))
 # multiple fun.agg and value.var (one-to-one)
-dcast(DT, x + y ~ z, fun=list(sum, mean), value.var=list("d1", "d2"))
+dcast(DT, x+y ~ z, fun.aggregate=list(sum, mean), value.var=list("d1", "d2"))
 }
 \seealso{
   \code{\link{melt.data.table}}, \code{\link{rowid}}, \url{https://cran.r-project.org/package=reshape}
diff --git a/man/deprecated.Rd b/man/deprecated.Rd
index c1bb9afc16..da138d8734 100644
--- a/man/deprecated.Rd
+++ b/man/deprecated.Rd
@@ -8,6 +8,9 @@
 \usage{
 key(x) <- value   #  warning since 2012; DEPRECATED since Mar 2019
 }
+\examples{
+# dummy example section to pass release check that all .Rd files have examples
+}
 \arguments{
 \item{x}{ Deprecated. }
 }
diff --git a/man/fcase.Rd b/man/fcase.Rd
index 82e582ca43..dd3a119110 100644
--- a/man/fcase.Rd
+++ b/man/fcase.Rd
@@ -5,7 +5,7 @@
 \code{fcase} is a fast implementation of SQL \code{CASE WHEN} statement for R. Conceptually, \code{fcase} is a nested version of \code{\link{fifelse}} (with smarter implementation than manual nesting). It is comparable to \code{dplyr::case_when} and supports \code{bit64}'s \code{integer64} and \code{nanotime} classes.
 }
 \usage{
-  fcase(..., default=NA)
+  fcase(\dots, default=NA)
 }
 \arguments{
 \item{...}{ A sequence consisting of logical condition (\code{when})-resulting value (\code{value}) \emph{pairs} in the following order \code{when1, value1, when2, value2, ..., whenN, valueN}. Logical conditions \code{when1, when2, ..., whenN} must all have the same length, type and attributes. Each \code{value} may either share length with \code{when} or be length 1. Please see Examples section for further details.}
diff --git a/man/fifelse.Rd b/man/fifelse.Rd
index 2fe355c98c..4165dd796d 100644
--- a/man/fifelse.Rd
+++ b/man/fifelse.Rd
@@ -11,10 +11,10 @@
 \arguments{
   \item{test}{ A logical vector. }
   \item{yes, no}{ Values to return depending on \code{TRUE}/\code{FALSE} element of \code{test}. They must be the same type and be either length \code{1} or the same length of \code{test}. }
-  \item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and length \code{1}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. }
+  \item{na}{ Value to return if an element of \code{test} is \code{NA}. It must be the same type as \code{yes} and \code{no} and its length must be either \code{1} or the same length of \code{test}. Default value \code{NA}. \code{NULL} is treated as \code{NA}. }
 }
 \details{
-In contrast to \code{\link[base]{ifelse}} attributes are copied from \code{yes} to the output. This is useful when returning \code{Date}, \code{factor} or other classes.
+In contrast to \code{\link[base]{ifelse}} attributes are copied from the first non-\code{NA} argument to the output. This is useful when returning \code{Date}, \code{factor} or other classes.
 }
 \value{
 A vector of the same length as \code{test} and attributes as \code{yes}. Data values are taken from the values of \code{yes} and \code{no}, eventually \code{na}.
diff --git a/man/fread.Rd b/man/fread.Rd
index 703eb70d3e..c7b7da8566 100644
--- a/man/fread.Rd
+++ b/man/fread.Rd
@@ -37,7 +37,8 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC"
   \item{nrows}{ The maximum number of rows to read. Unlike \code{read.table}, you do not need to set this to an estimate of the number of rows in the file for better speed because that is already automatically determined by \code{fread} almost instantly using the large sample of lines. \code{nrows=0} returns the column names and typed empty columns determined by the large sample; useful for a dry run of a large file or to quickly check format consistency of a set of files before starting to read any of them. }
   \item{header}{ Does the first data line contain column names? Defaults according to whether every non-empty field on the first data line is type character. If so, or TRUE is supplied, any empty column names are given a default name. }
   \item{na.strings}{ A character vector of strings which are to be interpreted as \code{NA} values. By default, \code{",,"} for columns of all types, including type \code{character} is read as \code{NA} for consistency. \code{,"",} is unambiguous and read as an empty string. To read \code{,NA,} as \code{NA}, set \code{na.strings="NA"}. To read \code{,,} as blank string \code{""}, set \code{na.strings=NULL}. When they occur in the file, the strings in \code{na.strings} should not appear quoted since that is how the string literal \code{,"NA",} is distinguished from \code{,NA,}, for example, when \code{na.strings="NA"}. }
-  \item{stringsAsFactors}{ Convert all character columns to factors? }
+  \item{stringsAsFactors}{ Convert all or some character columns to factors? Acceptable inputs are \code{TRUE}, \code{FALSE}, or a decimal value between 0.0 and 1.0. For \code{stringsAsFactors = FALSE}, all string columns are stored as \code{character} vs. all stored as \code{factor} when \code{TRUE}. When \code{stringsAsFactors = p} for \code{0 <= p <= 1}, string columns \code{col} are stored as \code{factor} if \code{uniqueN(col)/nrow < p}. 
+  }
   \item{verbose}{ Be chatty and report timings? }
   \item{skip}{ If 0 (default) start on the first line and from there finds the first row with a consistent number of columns. This automatically avoids irregular header information before the column names row. \code{skip>0} means ignore the first \code{skip} rows manually. \code{skip="string"} searches for \code{"string"} in the file (e.g. a substring of the column names row) and starts on that line (inspired by read.xls in package gdata). }
   \item{select}{ A vector of column names or numbers to keep, drop the rest. \code{select} may specify types too in the same way as \code{colClasses}; i.e., a vector of \code{colname=type} pairs, or a \code{list} of \code{type=col(s)} pairs. In all forms of \code{select}, the order that the columns are specified determines the order of the columns in the result. }
diff --git a/man/froll.Rd b/man/froll.Rd
index 388c47c485..090b397a90 100644
--- a/man/froll.Rd
+++ b/man/froll.Rd
@@ -12,71 +12,65 @@
 \alias{frollapply}
 \title{Rolling functions}
 \description{
-  Fast rolling functions to calculate aggregates on sliding window. Function name and arguments are experimental.
+  Fast rolling functions to calculate aggregates on sliding windows. Function name and arguments are experimental.
 }
 \usage{
-frollmean(x, n, fill=NA, algo=c("fast", "exact"), align=c("right",
-  "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
-frollsum(x, n, fill=NA, algo=c("fast","exact"), align=c("right", "left",
-  "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
+frollmean(x, n, fill=NA, algo=c("fast", "exact"),
+          align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
+frollsum(x, n, fill=NA, algo=c("fast","exact"),
+         align=c("right", "left", "center"), na.rm=FALSE, hasNA=NA, adaptive=FALSE)
 frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
 }
 \arguments{
-  \item{x}{ vector, list, data.frame or data.table of numeric or logical columns. }
-  \item{n}{ integer vector, for adaptive rolling function also list of
-    integer vectors, rolling window size. }
-  \item{fill}{ numeric or logical, value to pad by. Defaults to \code{NA}. }
-  \item{algo}{ character, default \code{"fast"}. When set to \code{"exact"},
-    then slower algorithm is used. It suffers less from floating point
-    rounding error, performs extra pass to adjust rounding error
-    correction and carefully handles all non-finite values. If available
-    it will use multiple cores. See details for more information. }
-  \item{align}{ character, define if rolling window covers preceding rows
-    (\code{"right"}), following rows (\code{"left"}) or centered
-    (\code{"center"}). Defaults to \code{"right"}. }
-  \item{na.rm}{ logical. Should missing values be removed when
-    calculating window? Defaults to \code{FALSE}. For details on handling
-    other non-finite values, see details below. }
-  \item{hasNA}{ logical. If it is known that \code{x} contains \code{NA}
-    then setting to \code{TRUE} will speed up. Defaults to \code{NA}. }
-  \item{adaptive}{ logical, should adaptive rolling function be
-    calculated, default \code{FALSE}. See details below. }
-  \item{FUN}{ the function to be applied in rolling fashion; see Details for restrictions }
-  \item{\dots}{ extra arguments passed to \code{FUN} in \code{frollapply}. }
+  \item{x}{ Vector, \code{data.frame} or \code{data.table} of integer, numeric or logical columns over which to calculate the windowed aggregations. May also be a list, in which case the rolling function is applied to each of its elements. }
+  \item{n}{ Integer vector giving rolling window size(s). This is the \emph{total} number of included values. Adaptive rolling functions also accept a list of integer vectors. }
+  \item{fill}{ Numeric; value to pad by. Defaults to \code{NA}. }
+  \item{algo}{ Character, default \code{"fast"}. When set to \code{"exact"}, a slower (but more accurate) algorithm is used. It
+    suffers less from floating point rounding errors by performing an extra pass, and carefully handles all non-finite values.
+    It will use mutiple cores where available. See Details for more information. }
+  \item{align}{ Character, specifying the "alignment" of the rolling window, defaulting to \code{"right"}. \code{"right"} covers preceding rows (the window \emph{ends} on the current value); \code{"left"} covers following rows (the window \emph{starts} on the current value); \code{"center"} is halfway in between (the window is \emph{centered} on the current value, biased towards \code{"left"} when \code{n} is even). }
+  \item{na.rm}{ Logical, default \code{FALSE}. Should missing values be removed when
+    calculating window? For details on handling other non-finite values, see Details. }
+  \item{hasNA}{ Logical. If it is known that \code{x} contains \code{NA}
+    then setting this to \code{TRUE} will speed up calculation. Defaults to \code{NA}. }
+  \item{adaptive}{ Logical, default \code{FALSE}. Should the rolling function be calculated adaptively? See Details below. }
+  \item{FUN}{ The function to be applied to the rolling window; see Details for restrictions. }
+  \item{\dots}{ Extra arguments passed to \code{FUN} in \code{frollapply}. }
 }
 \details{
-  \code{froll*} functions accepts vectors, lists, data.frames or
-  data.tables. They always return a list except when the input is a
-  \code{vector} and \code{length(n)==1} in which case a \code{vector}
-  is returned, for convenience. Thus rolling functions can be used
-  conveniently within data.table syntax.
+  \code{froll*} functions accept vectors, lists, \code{data.frame}s or
+  \code{data.table}s. They always return a list except when the input is a
+  \code{vector} and \code{length(n)==1}, in which case a \code{vector}
+  is returned, for convenience. Thus, rolling functions can be used
+  conveniently within \code{data.table} syntax.
 
   Argument \code{n} allows multiple values to apply rolling functions on
-  multiple window sizes. If \code{adaptive=TRUE}, then it expects a list.
+  multiple window sizes. If \code{adaptive=TRUE}, then \code{n} must be a list.
   Each list element must be integer vector of window sizes corresponding
-  to every single observation in each column.
+  to every single observation in each column; see Examples.
 
-  When \code{algo="fast"} then \emph{on-line} algorithm is used, also
-  any \code{NaN, +Inf, -Inf} is treated as \code{NA}.
-  Setting \code{algo="exact"} will make rolling functions to use
-  compute-intensive algorithm that suffers less from floating point
-  rounding error. It also handles \code{NaN, +Inf, -Inf} consistently to
+  When \code{algo="fast"} an \emph{"on-line"} algorithm is used, and
+  all of \code{NaN, +Inf, -Inf} are treated as \code{NA}.
+  Setting \code{algo="exact"} will make rolling functions to use a more
+  computationally-intensive algorithm that suffers less from floating point
+  rounding error (the same consideration applies to \code{\link[base]{mean}}).
+  \code{algo="exact"} also handles \code{NaN, +Inf, -Inf} consistently to
   base R. In case of some functions (like \emph{mean}), it will additionally
   make extra pass to perform floating point error correction. Error
   corrections might not be truly exact on some platforms (like Windows)
   when using multiple threads.
 
-  Adaptive rolling functions are special cases where for each single
-  observation has own corresponding rolling window width. Due to the logic
-  of adaptive rolling functions, following restrictions apply:
+  Adaptive rolling functions are a special case where each
+  observation has its own corresponding rolling window width. Due to the logic
+  of adaptive rolling functions, the following restrictions apply:
   \itemize{
     \item{ \code{align} only \code{"right"}. }
     \item{ if list of vectors is passed to \code{x}, then all
-      list vectors must have equal length. }
+      vectors within it must have equal length. }
   }
 
   When multiple columns or multiple windows width are provided, then they
-  are run in parallel. Except for the \code{algo="exact"} which runs in
+  are run in parallel. The exception is for \code{algo="exact"}, which runs in
   parallel already.
 
   \code{frollapply} computes rolling aggregate on arbitrary R functions.
@@ -113,7 +107,7 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center"))
     \item{ when \code{adaptive=TRUE}, then \code{n} must be vector of
       length equal to \code{nrow(x)}, or list of such vectors. }
     \item{ \code{partial} window feature is not supported, although it can
-      be accomplished by using \code{adaptive=TRUE}, see examples. }
+      be accomplished by using \code{adaptive=TRUE}, see examples. \code{NA} is always returned for incomplete windows. }
   }
 
   Be aware that rolling functions operates on the physical order of input.
diff --git a/man/fwrite.Rd b/man/fwrite.Rd
index f784b6bc3b..870acaac75 100644
--- a/man/fwrite.Rd
+++ b/man/fwrite.Rd
@@ -6,7 +6,8 @@ As \code{write.csv} but much faster (e.g. 2 seconds versus 1 minute) and just as
 }
 \usage{
 fwrite(x, file = "", append = FALSE, quote = "auto",
-  sep = ",", sep2 = c("","|",""),
+  sep=getOption("datatable.fwrite.sep", ","),
+  sep2 = c("","|",""),
   eol = if (.Platform$OS.type=="windows") "\r\n" else "\n",
   na = "", dec = ".", row.names = FALSE, col.names = TRUE,
   qmethod = c("double","escape"),
@@ -19,7 +20,8 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
   compress = c("auto", "none", "gzip"),
   yaml = FALSE,
   bom = FALSE,
-  verbose = getOption("datatable.verbose", FALSE))
+  verbose = getOption("datatable.verbose", FALSE),
+  encoding = "")
 }
 \arguments{
   \item{x}{Any \code{list} of same length vectors; e.g. \code{data.frame} and \code{data.table}. If \code{matrix}, it gets internally coerced to \code{data.table} preserving col names but not row names}
@@ -59,6 +61,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto",
   \item{yaml}{If \code{TRUE}, \code{fwrite} will output a CSVY file, that is, a CSV file with metadata stored as a YAML header, using \code{\link[yaml]{as.yaml}}. See \code{Details}. }
   \item{bom}{If \code{TRUE} a BOM (Byte Order Mark) sequence (EF BB BF) is added at the beginning of the file; format 'UTF-8 with BOM'.}
   \item{verbose}{Be chatty and report timings?}
+  \item{encoding}{ The encoding of the strings written to the CSV file. Default is \code{""}, which means writting raw bytes without considering the encoding. Other possible options are \code{"UTF-8"} and \code{"native"}. }
 }
 \details{
 \code{fwrite} began as a community contribution with \href{https://github.com/Rdatatable/data.table/pull/1613}{pull request #1613} by Otto Seiskari. This gave Matt Dowle the impetus to specialize the numeric formatting and to parallelize: \url{https://www.h2o.ai/blog/fast-csv-writing-for-r/}. Final items were tracked in \href{https://github.com/Rdatatable/data.table/issues/1664}{issue #1664} such as automatic quoting, \code{bit64::integer64} support, decimal/scientific formatting exactly matching \code{write.csv} between 2.225074e-308 and 1.797693e+308 to 15 significant figures, \code{row.names}, dates (between 0000-03-01 and 9999-12-31), times and \code{sep2} for \code{list} columns where each cell can itself be a vector.
diff --git a/man/measure.Rd b/man/measure.Rd
new file mode 100644
index 0000000000..73a315e006
--- /dev/null
+++ b/man/measure.Rd
@@ -0,0 +1,92 @@
+\name{measure}
+\alias{measure}
+\alias{measurev}
+\title{Specify measure.vars via regex or separator}
+\description{
+  These functions compute an integer vector or list for use as
+  the \code{measure.vars} argument to \code{melt}.
+  Each measured variable name is converted into several groups that occupy
+  different columns in the output melted data.
+  \code{measure} allows specifying group names/conversions in R code
+  (each group and conversion specified as an argument)
+  whereas \code{measurev} allows specifying group names/conversions using
+  data values
+  (each group and conversion specified as a list element).
+  See
+  \href{../doc/datatable-reshape.html}{\code{vignette("datatable-reshape")}}
+  for more info.
+}
+\usage{
+measure(\dots, sep, pattern, cols, multiple.keyword="value.name")
+measurev(fun.list, sep, pattern, cols, multiple.keyword="value.name",
+  group.desc="elements of fun.list")
+}
+\arguments{
+  \item{\dots}{One or more (1) symbols (without argument name; symbol
+    is used for group name) or (2) functions to convert the groups
+    (with argument name that is used for group name).
+    Must have same number of arguments as groups that are
+    specified by either \code{sep} or \code{pattern} arguments.}
+  \item{fun.list}{Named list which must have the same number of
+    elements as groups that are specified by either \code{sep} or
+    \code{pattern} arguments. Each name used for a group
+    name, and each value must be either a function
+    (to convert the group from a character vector to an atomic vector of the
+    same size) or NULL (no conversion).}
+  \item{sep}{Separator to split each element of \code{cols} into
+    groups. Columns that result in the maximum number of groups
+    are considered measure variables.}
+  \item{pattern}{Perl-compatible regex with capture groups to match to
+    \code{cols}. Columns that match the regex are considered measure variables.}
+  \item{cols}{A character vector of column names.}
+  \item{multiple.keyword}{A string, if used as a group name, then
+    measure returns a list and melt returns multiple
+    value columns (with names defined by the unique values in that
+    group). Otherwise if the string not used as a group name, then
+    measure returns a vector and melt returns a single value column.}
+  \item{group.desc}{Internal, used in error messages.}
+}
+\seealso{
+  \code{\link{melt}},
+  \url{https://github.com/Rdatatable/data.table/wiki/Getting-started}
+}
+\examples{
+(two.iris = data.table(datasets::iris)[c(1,150)])
+# melt into a single value column.
+melt(two.iris, measure.vars = measure(part, dim, sep="."))
+# do the same, programmatically with measurev
+my.list = list(part=NULL, dim=NULL)
+melt(two.iris, measure.vars=measurev(my.list, sep="."))
+# melt into two value columns, one for each part.
+melt(two.iris, measure.vars = measure(value.name, dim, sep="."))
+# melt into two value columns, one for each dim.
+melt(two.iris, measure.vars = measure(part, value.name, sep="."))
+# melt using sep, converting child number to integer.
+(two.families = data.table(sex_child1="M", sex_child2="F", age_child1=10, age_child2=20))
+print(melt(two.families, measure.vars = measure(
+  value.name, child=as.integer,
+  sep="_child"
+)), class=TRUE)
+# same melt using pattern.
+print(melt(two.families, measure.vars = measure(
+  value.name, child=as.integer,
+  pattern="(.*)_child(.)"
+)), class=TRUE)
+# same melt with pattern and measurev function list.
+print(melt(two.families, measure.vars = measurev(
+  list(value.name=NULL, child=as.integer),
+  pattern="(.*)_child(.)"
+)), class=TRUE)
+# inspired by data(who, package="tidyr")
+(who <- data.table(id=1, new_sp_m5564=2, newrel_f65=3))
+# melt to three variable columns, all character.
+melt(who, measure.vars = measure(diagnosis, gender, ages, pattern="new_?(.*)_(.)(.*)"))
+# melt to five variable columns, two numeric (with custom conversion).
+print(melt(who, measure.vars = measure(
+  diagnosis, gender, ages,
+  ymin=as.numeric,
+  ymax=function(y)ifelse(y=="", Inf, as.numeric(y)),
+  pattern="new_?(.*)_(.)(([0-9]{2})([0-9]{0,2}))"
+)), class=TRUE)
+}
+\keyword{data}
diff --git a/man/melt.data.table.Rd b/man/melt.data.table.Rd
index e56a10e4e1..ddca733fe8 100644
--- a/man/melt.data.table.Rd
+++ b/man/melt.data.table.Rd
@@ -31,7 +31,7 @@ non-measure columns will be assigned to it. If integer, must be positive; see De
   }
 
     For convenience/clarity in the case of multiple \code{melt}ed columns, resulting column names can be supplied as names to the elements \code{measure.vars} (in the \code{list} and \code{patterns} usages). See also \code{Examples}. }
-\item{variable.name}{name for the measured variable names column. The default name is \code{'variable'}.}
+\item{variable.name}{name (default \code{'variable'}) of output column containing information about which input column(s) were melted. If \code{measure.vars} is an integer/character vector, then each entry of this column contains the name of a melted column from \code{data}. If \code{measure.vars} is a list of integer/character vectors, then each entry of this column contains an integer indicating an index/position in each of those vectors. If \code{measure.vars} has attribute \code{variable_table} then it must be a data table with nrow = length of \code{measure.vars} vector(s), each row describing the corresponding measured variables(s), (typically created via \code{measure}) and its columns will be output instead of the \code{variable.name} column.}
 \item{value.name}{name for the molten data values column(s). The default name is \code{'value'}. Multiple names can be provided here for the case when \code{measure.vars} is a \code{list}, though note well that the names provided in \code{measure.vars} take precedence. }
 \item{na.rm}{If \code{TRUE}, \code{NA} values will be removed from the molten
 data.}
@@ -64,7 +64,11 @@ effect.
 
 From version \code{1.9.6}, \code{melt} gains a feature with \code{measure.vars}
 accepting a list of \code{character} or \code{integer} vectors as well to melt
-into multiple columns in a single function call efficiently. The function
+into multiple columns in a single function call efficiently.
+If a vector in the list contains missing values, or is shorter than the
+max length of the list elements, then the output will include runs of
+missing values at the specified position, or at the end.
+The function
 \code{\link{patterns}} can be used to provide regular expression patterns. When
 used along with \code{melt}, if \code{cols} argument is not provided, the
 patterns will be matched against \code{names(data)}, for convenience.
@@ -87,53 +91,68 @@ An unkeyed \code{data.table} containing the molten data.
 set.seed(45)
 require(data.table)
 DT <- data.table(
-      i_1 = c(1:5, NA),
-      i_2 = c(NA,6,7,8,9,10),
-      f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)),
-      f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE),
-      c_1 = sample(c(letters[1:3], NA), 6, TRUE),
-      d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"),
-      d_2 = as.Date(6:1, origin="2012-01-01"))
+  i_1 = c(1:5, NA),
+  n_1 = c(NA, 6, 7, 8, 9, 10),
+  f_1 = factor(sample(c(letters[1:3], NA), 6L, TRUE)),
+  f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE),
+  c_1 = sample(c(letters[1:3], NA), 6L, TRUE),
+  c_2 = sample(c(LETTERS[1:2], NA), 6L, TRUE),
+  d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"),
+  d_2 = as.Date(6:1, origin="2012-01-01")
+)
 # add a couple of list cols
-DT[, l_1 := DT[, list(c=list(rep(i_1, sample(5,1)))), by = i_1]$c]
-DT[, l_2 := DT[, list(c=list(rep(c_1, sample(5,1)))), by = i_1]$c]
+DT[, l_1 := DT[, list(c=list(rep(i_1, sample(5, 1L)))), by = i_1]$c]
+DT[, l_2 := DT[, list(c=list(rep(c_1, sample(5, 1L)))), by = i_1]$c]
 
-# id, measure as character/integer/numeric vectors
-melt(DT, id=1:2, measure="f_1")
-melt(DT, id=c("i_1", "i_2"), measure=3) # same as above
-melt(DT, id=1:2, measure=3L, value.factor=TRUE) # same, but 'value' is factor
-melt(DT, id=1:2, measure=3:4, value.factor=TRUE) # 'value' is *ordered* factor
+# id.vars, measure.vars as character/integer/numeric vectors
+melt(DT, id.vars=1:2, measure.vars="f_1")
+melt(DT, id.vars=c("i_1", "n_1"), measure.vars=3) # same as above
+melt(DT, id.vars=1:2, measure.vars=3L, value.factor=TRUE) # same, but 'value' is factor
+melt(DT, id.vars=1:2, measure.vars=3:4, value.factor=TRUE) # 'value' is *ordered* factor
 
 # preserves attribute when types are identical, ex: Date
-melt(DT, id=3:4, measure=c("d_1", "d_2"))
-melt(DT, id=3:4, measure=c("i_1", "d_1")) # attribute not preserved
+melt(DT, id.vars=3:4, measure.vars=c("d_1", "d_2"))
+melt(DT, id.vars=3:4, measure.vars=c("n_1", "d_1")) # attribute not preserved
 
 # on list
-melt(DT, id=1, measure=c("l_1", "l_2")) # value is a list
-melt(DT, id=1, measure=c("c_1", "l_1")) # c1 coerced to list
+melt(DT, id.vars=1, measure.vars=c("l_1", "l_2")) # value is a list
+suppressWarnings(
+  melt(DT, id.vars=1, measure.vars=c("c_1", "l_1")) # c1 coerced to list, with warning
+)
 
 # on character
-melt(DT, id=1, measure=c("c_1", "f_1")) # value is char
-melt(DT, id=1, measure=c("c_1", "i_2")) # i2 coerced to char
+melt(DT, id.vars=1, measure.vars=c("c_1", "f_1")) # value is char
+suppressWarnings(
+  melt(DT, id.vars=1, measure.vars=c("c_1", "n_1")) # n_1 coerced to char, with warning
+)
 
 # on na.rm=TRUE. NAs are removed efficiently, from within C
-melt(DT, id=1, measure=c("c_1", "i_2"), na.rm=TRUE) # remove NA
+melt(DT, id.vars=1, measure.vars=c("c_1", "c_2"), na.rm=TRUE) # remove NA
 
 # measure.vars can be also a list
 # melt "f_1,f_2" and "d_1,d_2" simultaneously, retain 'factor' attribute
 # convenient way using internal function patterns()
-melt(DT, id=1:2, measure=patterns("^f_", "^d_"), value.factor=TRUE)
+melt(DT, id.vars=1:2, measure.vars=patterns("^f_", "^d_"), value.factor=TRUE)
 # same as above, but provide list of columns directly by column names or indices
-melt(DT, id=1:2, measure=list(3:4, c("d_1", "d_2")), value.factor=TRUE)
+melt(DT, id.vars=1:2, measure.vars=list(3:4, c("d_1", "d_2")), value.factor=TRUE)
 # same as above, but provide names directly:
-melt(DT, id=1:2, measure=patterns(f="^f_", d="^d_"), value.factor=TRUE)
+melt(DT, id.vars=1:2, measure.vars=patterns(f="^f_", d="^d_"), value.factor=TRUE)
 
 # na.rm=TRUE removes rows with NAs in any 'value' columns
-melt(DT, id=1:2, measure=patterns("f_", "d_"), value.factor=TRUE, na.rm=TRUE)
+melt(DT, id.vars=1:2, measure.vars=patterns("f_", "d_"), value.factor=TRUE, na.rm=TRUE)
 
 # return 'NA' for missing columns, 'na.rm=TRUE' ignored due to list column
-melt(DT, id=1:2, measure=patterns("l_", "c_"), na.rm=TRUE)
+melt(DT, id.vars=1:2, measure.vars=patterns("l_", "c_"), na.rm=TRUE)
 
+# measure list with missing/short entries results in output with runs of NA
+DT.missing.cols <- DT[, .(d_1, d_2, c_1, f_2)]
+melt(DT.missing.cols, measure.vars=list(d=1:2, c="c_1", f=c(NA, "f_2")))
+
+# specifying columns to melt via separator.
+melt(DT.missing.cols, measure.vars=measure(value.name, number=as.integer, sep="_"))
+
+# specifying columns to melt via regex.
+melt(DT.missing.cols, measure.vars=measure(value.name, number=as.integer, pattern="(.)_(.)"))
 }
 \seealso{
   \code{\link{dcast}}, \url{https://cran.r-project.org/package=reshape}
diff --git a/man/openmp-utils.Rd b/man/openmp-utils.Rd
index b8d014976e..71e469ed72 100644
--- a/man/openmp-utils.Rd
+++ b/man/openmp-utils.Rd
@@ -5,7 +5,7 @@
 \alias{openmp}
 \title{ Set or get number of threads that data.table should use }
 \description{
-  Set and get number of threads to be used in \code{data.table} functions that are parallelized with OpenMP. The number of threads is initialized when \code{data.table} is first loaded in the R session using optional envioronment variables. Thereafter, the number of threads may be changed by calling \code{setDTthreads}. If you change an environment variable using \code{Sys.setenv} you will need to call \code{setDTthreads} again to reread the environment variables.
+  Set and get number of threads to be used in \code{data.table} functions that are parallelized with OpenMP. The number of threads is initialized when \code{data.table} is first loaded in the R session using optional environment variables. Thereafter, the number of threads may be changed by calling \code{setDTthreads}. If you change an environment variable using \code{Sys.setenv} you will need to call \code{setDTthreads} again to reread the environment variables.
 }
 \usage{
   setDTthreads(threads = NULL, restore_after_fork = NULL, percent = NULL, throttle = NULL)
@@ -51,4 +51,7 @@
     \item{\file{types.c} - Internal testing usage}
   }
 }
+\examples{
+  getDTthreads(verbose=TRUE)
+}
 \keyword{ data }
diff --git a/man/shouldPrint.Rd b/man/shouldPrint.Rd
index 80851f53d8..b3e1bcdc9b 100644
--- a/man/shouldPrint.Rd
+++ b/man/shouldPrint.Rd
@@ -21,5 +21,7 @@
   \url{https://github.com/IRkernel/IRkernel/issues/127}\cr
   \url{https://github.com/Rdatatable/data.table/issues/933}\cr
 }
-
+\examples{
+# dummy example section to pass release check that all .Rd files have examples
+}
 
diff --git a/man/special-symbols.Rd b/man/special-symbols.Rd
index 30cfedc5fa..9bfa72fceb 100644
--- a/man/special-symbols.Rd
+++ b/man/special-symbols.Rd
@@ -10,7 +10,7 @@
 \alias{.NGRP}
 \title{ Special symbols }
 \description{
-    \code{.SD}, \code{.BY}, \code{.N}, \code{.I}, \code{.GRP}, and \code{.NGRP} are \emph{read-only} symbols for use in \code{j}. \code{.N} can be used in \code{i} as well. See the vignettes and examples here and in \code{\link{data.table}}.
+    \code{.SD}, \code{.BY}, \code{.N}, \code{.I}, \code{.GRP}, and \code{.NGRP} are \emph{read-only} symbols for use in \code{j}. \code{.N} can be used in \code{i} as well. See the vignettes, Details and Examples here and in \code{\link{data.table}}.
     \code{.EACHI} is a symbol passed to \code{by}; i.e. \code{by=.EACHI}.
 }
 \details{
@@ -28,6 +28,8 @@
     }
 
     \code{.EACHI} is defined as \code{NULL} but its value is not used. Its usage is \code{by=.EACHI} (or \code{keyby=.EACHI}) which invokes grouping-by-each-row-of-i; see \code{\link{data.table}}'s \code{by} argument for more details.
+    
+    Note that \code{.N} in \code{i} is computed up-front, while that in \code{j} applies \emph{after filtering in \code{i}}. That means that even absent grouping, \code{.N} in \code{i} can be different from \code{.N} in \code{j}. See Examples.
 }
 \seealso{
     \code{\link{data.table}}, \code{\link{:=}}, \code{\link{set}}, \code{\link{datatable-optimize}}
@@ -52,5 +54,9 @@ DT[, c(.(y=max(y)), lapply(.SD, min)),
 DT[, grp := .GRP, by=x]                # add a group counter
 DT[, grp_pct := .GRP/.NGRP, by=x]      # add a group "progress" counter
 X[, DT[.BY, y, on="x"], by=x]          # join within each group
+
+# .N can be different in i and j
+DT[{cat(sprintf('in i, .N is \%d\n', .N)); a < .N/2},
+   {cat(sprintf('in j, .N is \%d\n', .N)); mean(a)}]
 }
 \keyword{ data }
diff --git a/man/substitute2.Rd b/man/substitute2.Rd
new file mode 100644
index 0000000000..3b8d536141
--- /dev/null
+++ b/man/substitute2.Rd
@@ -0,0 +1,77 @@
+\name{substitute2}
+\alias{substitute2}
+\alias{substitute}
+\alias{I}
+\title{ Substitute expression }
+\description{
+  Experimental, more robust, and more user-friendly version of base R \code{\link[base]{substitute}}.
+}
+\usage{
+  substitute2(expr, env)
+}
+\arguments{
+  \item{expr}{ Unevaluated expression in which substitution has to take place. }
+  \item{env}{ List, or an environment that will be coerced to list, from which variables will be taken to inject into \code{expr}. }
+}
+\details{
+  For convenience function will turn any character elements of \code{env} argument into symbols. In case if character is of length 2 or more, it will raise an error. It will also turn any list elements into list calls instead. Behaviour can be changed by wrapping \code{env} into \code{\link[base]{I}} call. In such case any symbols must be explicitly created, for example using \code{as.name} function. Alternatively it is possible to wrap particular elements of \code{env} into \code{\link[base]{I}} call, then only those elements will retain their original class.
+
+  Comparing to base R \code{\link[base]{substitute}}, \code{substitute2} function:
+\enumerate{
+  \item substitutes calls argument names as well
+  \item by default converts character elements of \code{env} argument to symbols
+  \item by default converts list elements of \code{env} argument to list calls
+  \item does not accept missing \code{env} argument
+  \item evaluates elements of \code{env} argument
+}
+}
+\note{
+  Conversion of \emph{character to symbol} and \emph{list to list call} works recursively for each list element in \code{env} list. If this behaviour is not desired for your use case, we would like to hear about that via our issue tracker. For the present moment there is an option to disable that: \code{options(datatable.enlist=FALSE)}. This option is provided only for debugging and will be removed in future. Please do not write code that depends on it, but use \code{\link[base]{I}} calls instead.
+}
+\value{
+  Quoted expression having variables and call argument names substituted.
+}
+\seealso{ \code{\link[base]{substitute}}, \code{\link[base]{I}}, \code{\link[base]{call}}, \code{\link[base]{name}}, \code{\link[base]{eval}} }
+\examples{
+## base R substitute vs substitute2
+substitute(list(var1 = var2), list(var1 = "c1", var2 = 5L))
+substitute2(list(var1 = var2), list(var1 = "c1", var2 = 5L)) ## works also on names
+
+substitute(var1, list(var1 = "c1"))
+substitute2(var1, list(var1 = I("c1"))) ## enforce character with I
+
+substitute(var1, list(var1 = as.name("c1")))
+substitute2(var1, list(var1 = "c1")) ## turn character into symbol, for convenience
+
+## mix symbols and characters using 'I' function, both lines will yield same result
+substitute2(list(var1 = var2), list(var1 = "c1", var2 = I("some_character")))
+substitute2(list(var1 = var2), I(list(var1 = as.name("c1"), var2 = "some_character")))
+
+## list elements are enlist'ed into list calls
+(cl1 = substitute(f(lst), list(lst = list(1L, 2L))))
+(cl2 = substitute2(f(lst), I(list(lst = list(1L, 2L)))))
+(cl3 = substitute2(f(lst), list(lst = I(list(1L, 2L)))))
+(cl4 = substitute2(f(lst), list(lst = quote(list(1L, 2L)))))
+(cl5 = substitute2(f(lst), list(lst = list(1L, 2L))))
+cl1[[2L]] ## base R substitute with list element
+cl2[[2L]] ## same
+cl3[[2L]] ## same
+cl4[[2L]] ## desired
+cl5[[2L]] ## automatically
+
+## character to name and list into list calls works recursively
+(cl1 = substitute2(f(lst), list(lst = list(1L, list(2L)))))
+(cl2 = substitute2(f(lst), I(list(lst = list(1L, list(2L)))))) ## unless I() used
+last(cl1[[2L]]) ## enlisted recursively
+last(cl2[[2L]]) ## AsIs
+
+## using substitute2 from another function
+f = function(expr, env) {
+  eval(substitute(
+    substitute2(.expr, env),
+    list(.expr = substitute(expr))
+  ))
+}
+f(list(var1 = var2), list(var1 = "c1", var2 = 5L))
+}
+\keyword{ data }
diff --git a/man/test.data.table.Rd b/man/test.data.table.Rd
index e84ae4797d..ba0fe25f9c 100644
--- a/man/test.data.table.Rd
+++ b/man/test.data.table.Rd
@@ -25,4 +25,9 @@ test.data.table(script = "tests.Rraw", verbose = FALSE, pkg = ".",
   If all tests were successful, \code{TRUE} is returned. Otherwise, see the \code{silent} argument above. \code{silent=TRUE} is intended for use at the start of production scripts; e.g. \code{stopifnot(test.data.table(silent=TRUE))} to check \code{data.table} is passing its own tests before proceeding.
 }
 \seealso{ \code{\link{data.table}}, \code{\link{test}} }
+\examples{
+  \dontrun{
+  test.data.table()
+  }
+}
 \keyword{ data }
diff --git a/po/R-data.table.pot b/po/R-data.table.pot
index 8e6d641240..ad00f12772 100644
--- a/po/R-data.table.pot
+++ b/po/R-data.table.pot
@@ -106,6 +106,9 @@ msgstr ""
 msgid "trying to use integer64 class when 'bit64' package is not installed"
 msgstr ""
 
+msgid "optimised between not available for this data type, fallback to slow R routine"
+msgstr ""
+
 msgid "Not yet implemented NAbounds=TRUE for this non-numeric and non-character type"
 msgstr ""
 
@@ -130,57 +133,99 @@ msgstr ""
 msgid "the second element should be the upper bound(s)."
 msgstr ""
 
-msgid "x."
+msgid "forderv(query) took ..."
+msgstr ""
+
+msgid "Generating final logical vector ..."
+msgstr ""
+
+msgid "done in"
+msgstr ""
+
+msgid "%s is type %s which is not supported by data.table join"
+msgstr ""
+
+msgid "Attempting roll join on factor column when joining %s to %s. Only integer, double or character columns may be roll joined."
+msgstr ""
+
+msgid "Matching %s factor levels to %s factor levels."
 msgstr ""
 
-msgid "is type"
+msgid "Coercing factor column %s to type character to match type of %s."
 msgstr ""
 
-msgid "which is not supported by data.table join"
+msgid "Matching character column %s to factor levels in %s."
 msgstr ""
 
-msgid "i."
+msgid "Incompatible join types: %s (%s) and %s (%s). Factor columns must join to factor or character columns."
 msgstr ""
 
-msgid "Attempting roll join on factor column when joining x."
+msgid "%s has same type (%s) as %s. No coercion needed."
 msgstr ""
 
-msgid "to i."
+msgid "Coercing all-NA %s (%s) to type %s to match type of %s."
 msgstr ""
 
-msgid ". Only integer, double or character columns may be roll joined."
+msgid "Incompatible join types: %s (%s) and %s (%s)"
 msgstr ""
 
-msgid "Incompatible join types: x."
+msgid "Coercing %s column %s%s to type integer64 to match type of %s."
 msgstr ""
 
-msgid "("
+msgid "Incompatible join types: %s is type integer64 but %s is type double and contains fractions"
 msgstr ""
 
-msgid ") and i."
+msgid "Coercing double column %s (which contains no fractions) to type integer to match type of %s"
 msgstr ""
 
-msgid "). Factor columns must join to factor or character columns."
+msgid "Coercing integer column %s to type double to match type of %s which contains fractions."
 msgstr ""
 
-msgid ")"
+msgid "Coercing integer column %s to type double for join to match type of %s."
 msgstr ""
 
-msgid "Incompatible join types:"
+msgid "on= matches existing key, using key"
 msgstr ""
 
-msgid "is type integer64 but"
+msgid "on= matches existing index, using index"
 msgstr ""
 
-msgid "is type double and contains fractions"
+msgid "Calculated ad hoc index in %s"
+msgstr ""
+
+msgid "Non-equi join operators detected ..."
 msgstr ""
 
 msgid "roll is not implemented for non-equi joins yet."
 msgstr ""
 
+msgid "forder took ..."
+msgstr ""
+
+msgid "Generating group lengths ..."
+msgstr ""
+
+msgid "Generating non-equi group ids ..."
+msgstr ""
+
 msgid "Column name '_nqgrp_' is reserved for non-equi joins."
 msgstr ""
 
+msgid "Recomputing forder with non-equi ids ..."
+msgstr ""
+
+msgid "Found %d non-equi group(s) ..."
+msgstr ""
+
+msgid "Starting bmerge ..."
+msgstr ""
+
+msgid "bmerge done in"
+msgstr ""
+
+msgid "cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] applied:"
+msgstr ""
+
 msgid "key argument of data.table() must be character"
 msgstr ""
 
@@ -322,12 +367,27 @@ msgstr ""
 msgid "Attempting to do natural join but no common columns in provided tables"
 msgstr ""
 
-msgid "Internal error. Cannot by=.EACHI when joining to a secondary key, yet"
+msgid "Joining but 'x' has no key, natural join using"
+msgstr ""
+
+msgid "not-join called with 'by=.EACHI'; Replacing !i with i=setdiff_(x,i) ..."
+msgstr ""
+
+msgid "Constructing irows for '!byjoin || nqbyjoin' ..."
+msgstr ""
+
+msgid "Internal error. Cannot by=.EACHI when joining to an index, yet"
 msgstr ""
 
 msgid "Internal error. irows has length in by=.EACHI"
 msgstr ""
 
+msgid "Reorder irows for 'mult==\"all\" && !allGrp1' ..."
+msgstr ""
+
+msgid "Reordering %d rows after bmerge done in ..."
+msgstr ""
+
 msgid "logical error. i is not a data.table, but 'on' argument is provided."
 msgstr ""
 
@@ -349,6 +409,9 @@ msgstr ""
 msgid "Internal error: notjoin but byjoin or !integer or nomatch==NA"
 msgstr ""
 
+msgid "Inverting irows for notjoin done in ..."
+msgstr ""
+
 msgid "with=FALSE together with := was deprecated in v1.9.4 released Oct 2014. Please wrap the LHS of := with parentheses; e.g., DT[,(myVar):=sum(b),by=a] to assign to column name(s) held in variable myVar. See ?':=' for other examples. As warned in 2014, this is now a warning."
 msgstr ""
 
@@ -385,9 +448,18 @@ msgstr ""
 msgid "but one or more items include a comma. Either pass a vector of column names (which can contain spaces, but no commas), or pass a vector length 1 containing comma separated column names. See ?data.table for other possibilities."
 msgstr ""
 
+msgid "by index '%s' but that index has 0 length. Ignoring."
+msgstr ""
+
 msgid "Internal error: irows isn't integer"
 msgstr ""
 
+msgid "i clause present and columns used in by detected, only these subset:"
+msgstr ""
+
+msgid "i clause present but columns used in by not detected. Having to subset all columns before evaluating 'by': '"
+msgstr ""
+
 msgid "'by' appears to evaluate to column names but isn't c() or key(). Use by=list(...) if you can. Otherwise, by=eval"
 msgstr ""
 
@@ -409,6 +481,9 @@ msgstr ""
 msgid "The items in the 'by' or 'keyby' list are length(s) (%s). Each must be length %d; the same length as there are rows in x (after subsetting if i is provided)."
 msgstr ""
 
+msgid "by-expression '%s' is not named, and the auto-generated name '%s' clashed with variable(s) in j. Therefore assigning the entire by-expression as name."
+msgstr ""
+
 msgid "Internal error: drop_dot passed"
 msgstr ""
 
@@ -457,6 +532,15 @@ msgstr ""
 msgid "This j doesn't use .SD but .SDcols has been supplied. Ignoring .SDcols. See ?data.table."
 msgstr ""
 
+msgid "Detected that j uses these columns:"
+msgstr ""
+
+msgid "'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a single j=eval(macro) instead. Both will detect the columns used which is important for efficiency.\nOld:"
+msgstr ""
+
+msgid "New:"
+msgstr ""
+
 msgid ".SD is locked. Using := in .SD's j is reserved for possible future use; a tortuously flexible way to modify by group. Use := in j directly to modify by group by reference."
 msgstr ""
 
@@ -472,9 +556,18 @@ msgstr ""
 msgid "LHS of := isn't column names ('character') or positions ('integer' or 'numeric')"
 msgstr ""
 
+msgid "No rows match i. No new columns to add so not evaluating RHS of :=\nAssigning to 0 row subset of %d rows"
+msgstr ""
+
 msgid "Invalid .internal.selfref detected and fixed by taking a (shallow) copy of the data.table so that := can add this new column by reference. At an earlier point, this data.table has been copied by R (or was created manually using structure() or similar). Avoid names<- and attr<- which in R currently (and oddly) may copy the whole data.table. Use set* syntax instead to avoid copying: ?set, ?setnames and ?setattr. If this message doesn't help, please report your use case to the data.table issue tracker so the root cause can be fixed or this message improved."
 msgstr ""
 
+msgid "Growing vector of column pointers from truelength %d to %d. A shallow copy has been taken, see ?setalloccol. Only a potential issue if two variables point to the same data (we can't yet detect that well) and if not you can safely ignore this. To avoid this message you could setalloccol() first, deep copy first using copy(), wrap with suppressWarnings() or increase the 'datatable.alloccol' option."
+msgstr ""
+
+msgid "Note that the shallow copy will assign to the environment from which := was called. That means for example that if := was called within a function, the original table may be unaffected."
+msgstr ""
+
 msgid "Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] syntax is only valid when i is length 1, but it's length"
 msgstr ""
 
@@ -517,24 +610,72 @@ msgstr ""
 msgid "The column '.I' can't be grouped because it conflicts with the special .I variable. Try setnames(DT,'.I','I') first."
 msgstr ""
 
+msgid "Note: forcing units=\"secs\" on implicit difftime by group; call difftime explicitly to choose custom units"
+msgstr ""
+
 msgid "logical error. i is not data.table, but mult='all' and 'by'=.EACHI"
 msgstr ""
 
 msgid "Internal error: by= is missing"
 msgstr ""
 
+msgid "Finding groups using forderv ..."
+msgstr ""
+
+msgid "Finding group sizes from the positions (can be avoided to save RAM) ..."
+msgstr ""
+
+msgid "Getting back original order ..."
+msgstr ""
+
+msgid "Finding groups using uniqlist on key ..."
+msgstr ""
+
 msgid "Internal error: byindex not the index name"
 msgstr ""
 
+msgid "Finding groups using uniqlist on index '%s' ..."
+msgstr ""
+
 msgid "Internal error: byindex not found"
 msgstr ""
 
+msgid "lapply optimization changed j from '%s' to '%s'"
+msgstr ""
+
+msgid "lapply optimization is on, j unchanged as '%s'"
+msgstr ""
+
+msgid "GForce optimized j to '"
+msgstr ""
+
+msgid "GForce is on, left j unchanged"
+msgstr ""
+
 msgid "Unable to optimize call to mean() and could be very slow. You must name 'na.rm' like that otherwise if you do mean(x,TRUE) the TRUE is taken to mean 'trim' which is the 2nd argument of mean. 'trim' is not yet optimized."
 msgstr ""
 
+msgid "Old mean optimization changed j from '%s' to '%s'"
+msgstr ""
+
+msgid "Old mean optimization is on, left j unchanged."
+msgstr ""
+
+msgid "All optimizations are turned off"
+msgstr ""
+
+msgid "Optimization is on but left j unchanged (single plain symbol): '%s'"
+msgstr ""
+
 msgid "Internal error: length(irows)!=length(o__)"
 msgstr ""
 
+msgid "Making each group and running j (GForce %s) ..."
+msgstr ""
+
+msgid "setkey() after the := with keyby= ..."
+msgstr ""
+
 msgid "The setkey() normally performed by keyby= has been skipped (as if by= was used) because := is being used together with keyby= but the keyby= contains some expressions. To avoid this warning, use by= instead, or provide existing column names to keyby=."
 msgstr ""
 
@@ -547,6 +688,9 @@ msgstr ""
 msgid "and bynames is"
 msgstr ""
 
+msgid "setkey() afterwards for keyby=.EACHI ..."
+msgstr ""
+
 msgid "rownames and rownames.value cannot both be used at the same time"
 msgstr ""
 
@@ -649,6 +793,9 @@ msgstr ""
 msgid "Argument 'by' must refer only to atomic-type columns, but the following columns are non-atomic:"
 msgstr ""
 
+msgid "Processing split.data.table with:"
+msgstr ""
+
 msgid "x is not a data.table. Shallow copy is a copy of the vector of column pointers (only), so is only meaningful for data.table"
 msgstr ""
 
@@ -820,6 +967,21 @@ msgstr ""
 msgid "Internal error in .isFastSubsettable. Please report to data.table developers"
 msgstr ""
 
+msgid "Subsetting optimization disabled because the cross-product of RHS values exceeds 1e4, causing memory problems."
+msgstr ""
+
+msgid "Optimized subsetting with key '"
+msgstr ""
+
+msgid "Optimized subsetting with index '"
+msgstr ""
+
+msgid "Creating new index '"
+msgstr ""
+
+msgid "Creating index %s done in ..."
+msgstr ""
+
 msgid "'on' argument should be a named atomic vector of column names indicating which columns in 'i' should be joined with which columns in 'x'."
 msgstr ""
 
@@ -850,6 +1012,9 @@ msgstr ""
 msgid "There is no package %s in provided repository."
 msgstr ""
 
+msgid "Git revision is not available. Most likely data.table was installed from CRAN or local archive.\nGit revision is available when installing from our repositories 'https://Rdatatable.gitlab.io/data.table' and 'https://Rdatatable.github.io/data.table'."
+msgstr ""
+
 msgid "'fromLast' must be TRUE or FALSE"
 msgstr ""
 
@@ -949,6 +1114,9 @@ msgstr ""
 msgid "Please provide a name to each element of 'measure.vars'."
 msgstr ""
 
+msgid "Duplicate column names found in molten data.table. Setting unique names using 'make.names'"
+msgstr ""
+
 msgid "y and x must both be data.tables. Use `setDT()` to convert list/data.frames to data.tables by reference or as.data.table() to convert to data.tables by copying."
 msgstr ""
 
@@ -1042,6 +1210,12 @@ msgstr ""
 msgid "POSIXct interval cols have mixed timezones. Overlaps are performed on the internal numerical representation of POSIXct objects (always in UTC epoch time), therefore printed values may give the impression that values don't overlap but their internal representations do Please ensure that POSIXct type interval cols have identical 'tzone' attributes to avoid confusion."
 msgstr ""
 
+msgid "unique() + setkey() operations done in ..."
+msgstr ""
+
+msgid "binary search(es) done in ..."
+msgstr ""
+
 msgid "Not yet implemented"
 msgstr ""
 
@@ -1171,6 +1345,9 @@ msgstr ""
 msgid "\". Please double check the input file is a valid csvy."
 msgstr ""
 
+msgid "Processed %d lines of YAML metadata with the following top-level fields: %s"
+msgstr ""
+
 msgid "User-supplied 'header' will override that found in metadata."
 msgstr ""
 
@@ -1231,6 +1408,9 @@ msgstr ""
 msgid "so the column has been left as type '"
 msgstr ""
 
+msgid "stringsAsFactors=%s converted %d column(s): %s"
+msgstr ""
+
 msgid "key argument of data.table() must be a character vector naming columns (NB: col.names are applied before this)"
 msgstr ""
 
@@ -1249,6 +1429,9 @@ msgstr ""
 msgid "x being coerced from class: matrix to data.table"
 msgstr ""
 
+msgid "Appending to existing file so setting bom=FALSE and yaml=FALSE"
+msgstr ""
+
 msgid "Input has no columns; doing nothing."
 msgstr ""
 
@@ -1315,6 +1498,9 @@ msgstr ""
 msgid "Using integer64 class columns require to have 'bit64' package installed."
 msgstr ""
 
+msgid "%s: using %s: %s"
+msgstr ""
+
 msgid "'xts' class passed to %s function but 'xts' is not available, you should have 'xts' installed already"
 msgstr ""
 
@@ -1408,19 +1594,7 @@ msgstr ""
 msgid "The option 'datatable.nomatch' is being used and is not set to the default NA. This option is still honored for now but will be deprecated in future. Please see NEWS for 1.12.4 for detailed information and motivation. To specify inner join, please specify `nomatch=NULL` explicitly in your calls rather than changing the default using this option."
 msgstr ""
 
-msgid "The datatable."
-msgstr ""
-
-msgid "version ("
-msgstr ""
-
-msgid ") does not match the package ("
-msgstr ""
-
-msgid "). Please close all R sessions to release the old"
-msgstr ""
-
-msgid "and reinstall data.table in a fresh R session. The root cause is that R's package installer can in some unconfirmed circumstances leave a package in a state that is apparently functional but where new R code is calling old C code silently: https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17478. Once a package is in this mismatch state it may produce wrong results silently until you next upgrade the package. Please help by adding precise circumstances to 17478 to move the status to confirmed. This mismatch between R and C code can happen with any package not just data.table. It is just that data.table has added this check."
+msgid "The datatable.%s version (%s) does not match the package (%s). Please close all R sessions to release the old %s and reinstall data.table in a fresh R session. The root cause is that R's package installer can in some unconfirmed circumstances leave a package in a state that is apparently functional but where new R code is calling old C code silently: https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17478. Once a package is in this mismatch state it may produce wrong results silently until you next upgrade the package. Please help by adding precise circumstances to 17478 to move the status to confirmed. This mismatch between R and C code can happen with any package not just data.table. It is just that data.table has added this check."
 msgstr ""
 
 msgid "This is R"
@@ -1474,6 +1648,15 @@ msgstr ""
 msgid "Column classes will be suppressed when col.names is 'none'"
 msgstr ""
 
+msgid "Key: <%s>"
+msgstr ""
+
+msgid "Null data.%s (0 rows and 0 cols)"
+msgstr ""
+
+msgid "Empty data.%s (%d rows and %d cols)"
+msgstr ""
+
 msgid "Internal structure doesn't seem to be a list. Possibly corrupt data.table."
 msgstr ""
 
@@ -1516,6 +1699,18 @@ msgstr ""
 msgid "Internal error. 'cols' should be character at this point in setkey; please report."
 msgstr ""
 
+msgid "forder took"
+msgstr ""
+
+msgid "setkey on columns %s using existing index '%s'"
+msgstr ""
+
+msgid "reorder took"
+msgstr ""
+
+msgid "x is already ordered by these columns, no need to call reorder"
+msgstr ""
+
 msgid "Internal error: index '"
 msgstr ""
 
@@ -1576,25 +1771,13 @@ msgstr ""
 msgid "length(by.x) != length(by.y)"
 msgstr ""
 
-msgid "When x's column ('"
-msgstr ""
-
-msgid "') is character, the corresponding column in y ('"
-msgstr ""
-
-msgid "') should be factor or character, but found incompatible type '"
-msgstr ""
-
-msgid "') is factor, the corresponding column in y ('"
+msgid "When x's column ('%s') is character, the corresponding column in y ('%s') should be factor or character, but found incompatible type '%s'."
 msgstr ""
 
-msgid "') should be character or factor, but found incompatible type '"
+msgid "When x's column ('%s') is factor, the corresponding column in y ('%s') should be character or factor, but found incompatible type '%s'."
 msgstr ""
 
-msgid "') is integer or numeric, the corresponding column in y ('"
-msgstr ""
-
-msgid "') can not be character or logical types, but found incompatible type '"
+msgid "When x's column ('%s') is integer or numeric, the corresponding column in y ('%s') can not be character or logical types, but found incompatible type '%s'."
 msgstr ""
 
 msgid "argument 'all' should be logical of length one"
@@ -1645,12 +1828,18 @@ msgstr ""
 msgid "argument 'fill' ignored, only make sense for type='const'"
 msgstr ""
 
+msgid "No objects of class data.table exist in %s"
+msgstr ""
+
 msgid "order.col='"
 msgstr ""
 
 msgid "' not a column name of info"
 msgstr ""
 
+msgid "Total:"
+msgstr ""
+
 msgid "data.table package is loaded. Unload or start a fresh R session."
 msgstr ""
 
@@ -1660,25 +1849,31 @@ msgstr ""
 msgid "Neither %s nor %s exist in %s"
 msgstr ""
 
+msgid "test.data.table() running:"
+msgstr ""
+
+msgid "**** This R session's language is not English. Each test will still check that the correct number of errors and/or\n**** warnings are produced. However, to test the text of each error/warning too, please restart R with LANGUAGE=en"
+msgstr ""
+
 msgid "Failed after test"
 msgstr ""
 
 msgid "before the next test() call in"
 msgstr ""
 
-msgid "out of"
+msgid "Timings count mismatch:"
 msgstr ""
 
-msgid ". Search"
+msgid "vs"
 msgstr ""
 
-msgid "for test number"
+msgid "10 longest running tests took"
 msgstr ""
 
-msgid "Timings count mismatch:"
+msgid "All %d tests in %s completed ok in %s"
 msgstr ""
 
-msgid "vs"
+msgid "Running test id %s"
 msgstr ""
 
 msgid "Test"
@@ -1687,6 +1882,33 @@ msgstr ""
 msgid "is invalid: when error= is provided it does not make sense to pass y as well"
 msgstr ""
 
+msgid "Test id %s is not in increasing order"
+msgstr ""
+
+msgid "Test %s produced %d %ss but expected %d"
+msgstr ""
+
+msgid "Test %s didn't produce the correct %s:\nExpected: %s\nObserved: %s"
+msgstr ""
+
+msgid "Output captured before unexpected warning/error/message:"
+msgstr ""
+
+msgid "Test %s did not produce the correct output:\nExpected: <<%s>>\nObserved <<%s>>"
+msgstr ""
+
+msgid "Test %s produced output but should not have:\nExpected absent (case insensitive): <<%s>>\nObserved: <<%s>>"
+msgstr ""
+
+msgid "Test %s ran without errors but selfrefok(%s) is FALSE"
+msgstr ""
+
+msgid "Test %s ran without errors but failed check that x equals y:"
+msgstr ""
+
+msgid "First %d of %d (type '%s'):"
+msgstr ""
+
 msgid "Use started.at=proc.time() not Sys.time() (POSIXt and slow)"
 msgstr ""
 
@@ -1756,7 +1978,17 @@ msgstr ""
 msgid "Following columns are not numeric and will be omitted:"
 msgstr ""
 
+msgid        "Index: "
+msgid_plural "Indices: "
+msgstr[0]    ""
+msgstr[1]    ""
+
 msgid        "%d variable not shown: %s\n"
 msgid_plural "%d variables not shown: %s\n"
 msgstr[0]    ""
 msgstr[1]    ""
+
+msgid        "%d error out of %d. Search %s for test number %s"
+msgid_plural "%d errors out of %d. Search %s for test numbers %s"
+msgstr[0]    ""
+msgstr[1]    ""
diff --git a/po/R-zh_CN.po b/po/R-zh_CN.po
index a73b8e4a1b..7e78584fd7 100644
--- a/po/R-zh_CN.po
+++ b/po/R-zh_CN.po
@@ -136,6 +136,11 @@ msgstr "。将采用 UTC 时间进行比较。"
 msgid "trying to use integer64 class when 'bit64' package is not installed"
 msgstr "试图使用 intger64 类型但 'bit64' 包尚未安装"
 
+msgid ""
+"optimised between not available for this data type, fallback to slow R "
+"routine"
+msgstr "对这种数据类型的优化尚未实现，使用备用较慢的R方法。"
+
 msgid ""
 "Not yet implemented NAbounds=TRUE for this non-numeric and non-character type"
 msgstr ""
@@ -165,61 +170,118 @@ msgstr "第一个元素应为下界；"
 msgid "the second element should be the upper bound(s)."
 msgstr "第二个元素应为上界。"
 
-msgid "x."
-msgstr "x."
+msgid "forderv(query) took ..."
+msgstr "forderv(query) 用了 ..."
+
+msgid "Generating final logical vector ..."
+msgstr "产生最后的逻辑向量 ..."
 
-msgid "is type"
-msgstr "的类型为"
+msgid "done in"
+msgstr "用了"
 
-msgid "which is not supported by data.table join"
-msgstr "，该类型无法用于 data.table 的联接"
+msgid "%s is type %s which is not supported by data.table join"
+msgstr "%s的类型为%s，该类型无法用于 data.table 的联接"
+
+msgid ""
+"Attempting roll join on factor column when joining %s to %s. Only integer, "
+"double or character columns may be roll joined."
+msgstr ""
+"联接%s与%s时试图滚动联接（roll join）因子类型（factor）的列。但只有整数"
+"（integer）、双精度（double）或字符（character）类型的列可以使用滚动联接。"
 
-msgid "i."
-msgstr "i."
+msgid "Matching %s factor levels to %s factor levels."
+msgstr "匹配 %s 的因子水平和 %s 的因子水平。"
 
-msgid "Attempting roll join on factor column when joining x."
-msgstr "试图滚动联接（roll join）因子类型（factor）的列，这发生于将 x."
+msgid "Coercing factor column %s to type character to match type of %s."
+msgstr "将因子类型列 %s 强制转换成字符来匹配目 %s。"
 
-msgid "to i."
-msgstr "与 i."
+msgid "Matching character column %s to factor levels in %s."
+msgstr "匹配字符类型列 %s 和 %s 的因子水平。"
 
-msgid ". Only integer, double or character columns may be roll joined."
+msgid ""
+"Incompatible join types: %s (%s) and %s (%s). Factor columns must join to "
+"factor or character columns."
 msgstr ""
-"联接时。但只有整数（integer）、双精度（double）或字符（character）类型的列可"
-"以使用滚动联接（roll join）。"
+"不兼容的联结类型: %s (%s) 和 %s (%s)。 因子类型的列必须与因子类型或字符类型的"
+"列才可以联结"
 
-msgid "Incompatible join types: x."
-msgstr "不兼容的联结类型： x。"
+msgid "%s has same type (%s) as %s. No coercion needed."
+msgstr "%s 有 %s 的类型。不需要强制转换。"
 
-msgid "("
-msgstr "（"
+msgid "Coercing all-NA %s (%s) to type %s to match type of %s."
+msgstr "强制转换 all-NA %s (%s) 为 %s 类型用来匹配 %s 类型。"
 
-msgid ") and i."
-msgstr "）和 i。"
+msgid "Incompatible join types: %s (%s) and %s (%s)"
+msgstr "不兼容的联结类型: %s (%s) 和 %s (%s)。"
 
-msgid "). Factor columns must join to factor or character columns."
-msgstr "）。 因子类型的列必须与因子类型或字符类型的列才可以联结"
+msgid "Coercing %s column %s%s to type integer64 to match type of %s."
+msgstr "强制转换 %s 个列 %s%s 为整数64类型用来匹配 %s 类型。"
 
-msgid ")"
-msgstr "）"
+msgid ""
+"Incompatible join types: %s is type integer64 but %s is type double and "
+"contains fractions"
+msgstr ""
+"不兼容的联结类型: %s 是 integer64 类型的列但 %s 是有分数的双精度类型列。"
 
-msgid "Incompatible join types:"
-msgstr "不兼容的联结类型"
+msgid ""
+"Coercing double column %s (which contains no fractions) to type integer to "
+"match type of %s"
+msgstr "强制转换双精度列 %s (不含有分数) 为整数用来匹配 %s 类型"
 
-msgid "is type integer64 but"
-msgstr "是 integer64 类型但是"
+msgid ""
+"Coercing integer column %s to type double to match type of %s which contains "
+"fractions."
+msgstr "强制转换整数列 %s 为双精度用来匹配含有分数的 %s 类型。"
+
+msgid "Coercing integer column %s to type double for join to match type of %s."
+msgstr "强制转换整数列 %s 为双精度用来与类型 %s 进行联结。"
+
+msgid "on= matches existing key, using key"
+msgstr "on=和现有键(key)相等,用键"
+
+msgid "on= matches existing index, using index"
+msgstr "on=和现有索引(index)相等,用索引"
+
+msgid "Calculated ad hoc index in %s"
+msgstr "计算临时索引用了 %s"
 
-msgid "is type double and contains fractions"
-msgstr "是 double 类型并且包含分数"
+msgid "Non-equi join operators detected ..."
+msgstr "侦测到不等长联结操作符(operator)..."
 
 msgid "roll is not implemented for non-equi joins yet."
 msgstr "不等长联结还不能执行 roll "
 
+msgid "forder took ..."
+msgstr "forder 用了 ..."
+
+msgid "Generating group lengths ..."
+msgstr "正在生成组的长度。。。"
+
+msgid "Generating non-equi group ids ..."
+msgstr "正在生成不等长的组标识符 . . . "
+
 msgid "Column name '_nqgrp_' is reserved for non-equi joins."
 msgstr "列名 '_nqgrp_' 是为不等长联结保留的"
 
+msgid "Recomputing forder with non-equi ids ..."
+msgstr "用不等长的组标志符重新计算 forder . . . "
+
+msgid "Found %d non-equi group(s) ..."
+msgstr "找到%d不等长分组 ..."
+
+msgid "Starting bmerge ..."
+msgstr "bmerge开始..."
+
+msgid "bmerge done in"
+msgstr "bmerge 用了"
+
+msgid ""
+"cedta decided '%s' wasn't data.table aware. Here is call stack with [[1L]] "
+"applied:"
+msgstr "cedta决定data.table不识别 '%s'。使用[[1L]]后的呼叫堆叠就是:"
+
 msgid "key argument of data.table() must be character"
-msgstr "data.table() 的主参数必须是字符"
+msgstr "data.table() 的key参数必须是字符"
 
 msgid "Object '"
 msgstr "对象 '"
@@ -427,18 +489,34 @@ msgid ""
 msgstr ""
 "但i是一个 data.table (或者是字符向量)，必须使用 'on=' 参数指明参与连接的列 "
 "(参见 ?data.table)，可以是keying x(比如，已排序过，和标记已排序过，请参见?"
-"setkey)，或者是在x和i共用列的名字(比如，自然连接)。如果x有在内存被排序过，"
-"Keyed连接的速度会在非常大的数据上有较明显的提高。"
+"setkey)，或者是在x和i共用列的名字(比如，自然连接)。如果x有在内存被排序过，键"
+"(keyed)连接的速度会在非常大的数据上有较明显的提高。"
 
 msgid "Attempting to do natural join but no common columns in provided tables"
 msgstr "尝试进行自然连接然而并没有找到表格中相同的列"
 
-msgid "Internal error. Cannot by=.EACHI when joining to a secondary key, yet"
-msgstr "内部错误：目前尚无法对次键使用by=.EACH命令"
+msgid "Joining but 'x' has no key, natural join using"
+msgstr "联结但 'x' 没有键 (key)，自然联结用"
+
+msgid "not-join called with 'by=.EACHI'; Replacing !i with i=setdiff_(x,i) ..."
+msgstr ""
+"配套使用了 not-join 和 'by=.EACHI' 的命令; 用 !i 取代 i=setdiff_(x,i) ..."
+
+msgid "Constructing irows for '!byjoin || nqbyjoin' ..."
+msgstr "构造 irows 用来对应于 '!byjoin || nqbyjoin' ..."
+
+msgid "Internal error. Cannot by=.EACHI when joining to an index, yet"
+msgstr "内部错误：目前尚无法对索引(index)使用by=.EACH命令"
 
 msgid "Internal error. irows has length in by=.EACHI"
 msgstr "内部错误：by=.EACHI 中 irows 有长度"
 
+msgid "Reorder irows for 'mult==\"all\" && !allGrp1' ..."
+msgstr "对'mult==\"all\" && !allGrp1'再排序irows ..."
+
+msgid "Reordering %d rows after bmerge done in ..."
+msgstr "bmerge 之后再排序%d行用了..."
+
 msgid "logical error. i is not a data.table, but 'on' argument is provided."
 msgstr "逻辑错误。当 i 并非一个 data.table时，不应提供'on'参数"
 
@@ -465,6 +543,9 @@ msgstr ""
 msgid "Internal error: notjoin but byjoin or !integer or nomatch==NA"
 msgstr "内部错误。原因可能为：notjoin 而非 byjoin；非整数；nomatch 为空"
 
+msgid "Inverting irows for notjoin done in ..."
+msgstr "对 notjoin 求逆 irows 用了 ..."
+
 msgid ""
 "with=FALSE together with := was deprecated in v1.9.4 released Oct 2014. "
 "Please wrap the LHS of := with parentheses; e.g., DT[,(myVar):=sum(b),by=a] "
@@ -519,9 +600,22 @@ msgstr ""
 "包含逗号），或传入一个长度为1，由逗号分隔的列名组成的向量输入 ?data.table查看"
 "其他的选项。"
 
+msgid "by index '%s' but that index has 0 length. Ignoring."
+msgstr "by 索引(index) '%s' 但那索引的长度为0。将被忽视。"
+
 msgid "Internal error: irows isn't integer"
 msgstr "内部错误：irows 不是整型"
 
+msgid "i clause present and columns used in by detected, only these subset:"
+msgstr "有 i 子句和在 by 用的列被侦测, 子集只有这个:"
+
+msgid ""
+"i clause present but columns used in by not detected. Having to subset all "
+"columns before evaluating 'by': '"
+msgstr ""
+"有 i 子句但是在 by 用的列并没有被侦测。于是所有的列将用于接下里的 'by': 运"
+"算。"
+
 msgid ""
 "'by' appears to evaluate to column names but isn't c() or key(). Use "
 "by=list(...) if you can. Otherwise, by=eval"
@@ -560,6 +654,13 @@ msgstr ""
 "在'by'或'keyby'列表中的项长度为 %s。每一项的长度须均为%d，即应与 x （或经 i "
 "筛选后的子集）中所包含行数相同。"
 
+msgid ""
+"by-expression '%s' is not named, and the auto-generated name '%s' clashed "
+"with variable(s) in j. Therefore assigning the entire by-expression as name."
+msgstr ""
+"by-expression '%s' 没有命名，自动生成的名字 '%s' 与 j 中的变量名冲突。将用 "
+"by-expression 用来命名。"
+
 msgid "Internal error: drop_dot passed"
 msgstr "内部错误：drop_dot 传入的参数有"
 
@@ -622,6 +723,22 @@ msgid ""
 "data.table."
 msgstr "此处 j 不使用 .SD 但提供了 .SDcols ，因此忽略 .SDcols详见 ?data.table"
 
+msgid "Detected that j uses these columns:"
+msgstr "侦测 j 用这个列:"
+
+msgid ""
+"'(m)get' found in j. ansvars being set to all columns. Use .SDcols or a "
+"single j=eval(macro) instead. Both will detect the columns used which is "
+"important for efficiency.\n"
+"Old:"
+msgstr ""
+"j 中找到了 '(m)get'。ansvars 将应用到所有的列。请考虑使用 .SDcols 或者一个单"
+"独的 j=eval(macro)两个命令都会侦测影响效率的列。\n"
+"旧:"
+
+msgid "New:"
+msgstr "新:"
+
 msgid ""
 ".SD is locked. Using := in .SD's j is reserved for possible future use; a "
 "tortuously flexible way to modify by group. Use := in j directly to modify "
@@ -647,6 +764,13 @@ msgid ""
 "'numeric')"
 msgstr ":= 的 LHS 不是列名('字符')或列的位置('整数'或'数值')"
 
+msgid ""
+"No rows match i. No new columns to add so not evaluating RHS of :=\n"
+"Assigning to 0 row subset of %d rows"
+msgstr ""
+"没有找到匹配 i 的行。无法增加新的列所以无法运算 RHS of :=\n"
+"指定一个 0 行的子集"
+
 msgid ""
 "Invalid .internal.selfref detected and fixed by taking a (shallow) copy of "
 "the data.table so that := can add this new column by reference. At an "
@@ -664,6 +788,28 @@ msgstr ""
 "及 ?setattr如果以上讯息无法提供帮助，请回报你的案例至 data.table 问题追踪以助"
 "于修复根本原因或改进本讯息"
 
+msgid ""
+"Growing vector of column pointers from truelength %d to %d. A shallow copy "
+"has been taken, see ?setalloccol. Only a potential issue if two variables "
+"point to the same data (we can't yet detect that well) and if not you can "
+"safely ignore this. To avoid this message you could setalloccol() first, "
+"deep copy first using copy(), wrap with suppressWarnings() or increase the "
+"'datatable.alloccol' option."
+msgstr ""
+"列指针向量从 truelength %d 增加为 %d。浅拷贝已经完成，详见 ?setalloccol。如果"
+"两个变量指向同一个数据 （这个我们无法侦测），会导致潜在的问题。如果并没有，你"
+"可以:忽视这个问题。如果想要避免警告，可以使用以下任一命令，像是 "
+"setalloccol()，用 copy() 深度拷贝，套用 suppressWarnings() 或者是增加 "
+"'datatable.alloccol' 的选项。"
+
+msgid ""
+"Note that the shallow copy will assign to the environment from which := was "
+"called. That means for example that if := was called within a function, the "
+"original table may be unaffected."
+msgstr ""
+"需要注意的是这个浅拷贝会被指向给调用了 which := 的环境。意思就是说，如果在函"
+"数内部调用了 if :=, 原先的 table 可能不会有任何变化。"
+
 msgid ""
 "Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] "
 "syntax is only valid when i is length 1, but it's length"
@@ -735,18 +881,52 @@ msgstr ""
 "无法对 '.I' 列进行分组，因为与 data.table 特有的 .I 变量冲突请先尝试 "
 "setnames(DT,'.I','I')"
 
+msgid ""
+"Note: forcing units=\"secs\" on implicit difftime by group; call difftime "
+"explicitly to choose custom units"
+msgstr ""
+"注意：在隐含的 difftime 强制分组使用了 units=\"secs\"; 请明确的调用 difftime "
+"来选择自定义的单位。"
+
 msgid "logical error. i is not data.table, but mult='all' and 'by'=.EACHI"
 msgstr "逻辑错误: i 不是data.table，但 mult='all' 及 'by'=.EACHI"
 
 msgid "Internal error: by= is missing"
 msgstr "内部错误 : 缺少 by="
 
+msgid "Finding groups using forderv ..."
+msgstr "搜寻组中配套使用了 forderv . . . "
+
+msgid "Finding group sizes from the positions (can be avoided to save RAM) ..."
+msgstr "从位置中搜寻组的大小 (避免此举来节省内存) . . ."
+
+msgid "Getting back original order ..."
+msgstr "恢复原有的顺序 . . . "
+
+msgid "Finding groups using uniqlist on key ..."
+msgstr "搜寻组并配套使用了将 uniqlist 用在键 (key) ... "
+
 msgid "Internal error: byindex not the index name"
-msgstr "内部错误 : byindex 不是索引名称"
+msgstr "内部错误 : byindex 不是索引(index)名称"
+
+msgid "Finding groups using uniqlist on index '%s' ..."
+msgstr "搜寻组并配套使用了将 uniqlist 用在索引 (index) '%s'... "
 
 msgid "Internal error: byindex not found"
 msgstr "内部错误 : 找不到 byindex"
 
+msgid "lapply optimization changed j from '%s' to '%s'"
+msgstr "lapply优化改变j从'%s'成'%s'"
+
+msgid "lapply optimization is on, j unchanged as '%s'"
+msgstr "lapply优化打开了, j ('%s')没有区别"
+
+msgid "GForce optimized j to '"
+msgstr "GForce优化 j 到 '"
+
+msgid "GForce is on, left j unchanged"
+msgstr "GForce打开了, j 没有区别"
+
 msgid ""
 "Unable to optimize call to mean() and could be very slow. You must name 'na."
 "rm' like that otherwise if you do mean(x,TRUE) the TRUE is taken to mean "
@@ -756,9 +936,27 @@ msgstr ""
 "果您直接使用 mean(x,TRUE)会被认定为 trim=TRUE，trim 是 mean() 中尚未被优化的"
 "第二顺位参数"
 
+msgid "Old mean optimization changed j from '%s' to '%s'"
+msgstr "旧mean优化改变j 从'%s'成'%s'"
+
+msgid "Old mean optimization is on, left j unchanged."
+msgstr "旧mean优化打开了，j没有区别。"
+
+msgid "All optimizations are turned off"
+msgstr "所有优化关掉了"
+
+msgid "Optimization is on but left j unchanged (single plain symbol): '%s'"
+msgstr "优化打开了但是并没有改变 j (一个普通符号)：'%s'"
+
 msgid "Internal error: length(irows)!=length(o__)"
 msgstr "内部错误：length(irows)!=length(o__)"
 
+msgid "Making each group and running j (GForce %s) ..."
+msgstr "进行分组中，并且运行 j (GForce %s) ..."
+
+msgid "setkey() after the := with keyby= ..."
+msgstr "keyby=中，:=后setkey() ..."
+
 msgid ""
 "The setkey() normally performed by keyby= has been skipped (as if by= was "
 "used) because := is being used together with keyby= but the keyby= contains "
@@ -778,6 +976,9 @@ msgstr "但是ans(答案)是"
 msgid "and bynames is"
 msgstr "同时bynames是"
 
+msgid "setkey() afterwards for keyby=.EACHI ..."
+msgstr "keyby=.EACHI中到底setkey() ..."
+
 msgid "rownames and rownames.value cannot both be used at the same time"
 msgstr "rownames和rownames.value 不能同时使用"
 
@@ -798,7 +999,7 @@ msgstr ""
 "行名长度为零，`length(rownames)==0`，但应该为单一列名，单一数值，或NULL"
 
 msgid "rownames is TRUE but key has multiple columns"
-msgstr "rownames是TRUE但key不只一个列"
+msgstr "rownames是TRUE但键(key)不只一个列"
 
 msgid "; taking first column x[,1] as rownames"
 msgstr "； 取第一列, `column x[,1]`, 为rownames"
@@ -901,6 +1102,9 @@ msgid ""
 "columns are non-atomic:"
 msgstr "参数 'by' 只适用于原子类型的纵列，但现在关联的纵列不是原子类型"
 
+msgid "Processing split.data.table with:"
+msgstr "运行 split.data.table 中使用: "
+
 msgid ""
 "x is not a data.table. Shallow copy is a copy of the vector of column "
 "pointers (only), so is only meaningful for data.table"
@@ -1125,6 +1329,23 @@ msgid ""
 "Internal error in .isFastSubsettable. Please report to data.table developers"
 msgstr ".isFastSubsettable 产生了内部错误。请向 data.table 开发者报告"
 
+msgid ""
+"Subsetting optimization disabled because the cross-product of RHS values "
+"exceeds 1e4, causing memory problems."
+msgstr "筛选子集优化被停止，因为叉积后的RHS值将超过 1e4，会造成内存问题。"
+
+msgid "Optimized subsetting with key '"
+msgstr "优化的子集用键(key) '"
+
+msgid "Optimized subsetting with index '"
+msgstr "优化的子集用索引(index) '"
+
+msgid "Creating new index '"
+msgstr "造成新索引(index) '"
+
+msgid "Creating index %s done in ..."
+msgstr "造成新索引(index) %s 用了 ..."
+
 msgid ""
 "'on' argument should be a named atomic vector of column names indicating "
 "which columns in 'i' should be joined with which columns in 'x'."
@@ -1159,6 +1380,17 @@ msgstr "."
 msgid "There is no package %s in provided repository."
 msgstr "所提供的资料库中不含包%s"
 
+msgid ""
+"Git revision is not available. Most likely data.table was installed from "
+"CRAN or local archive.\n"
+"Git revision is available when installing from our repositories 'https://"
+"Rdatatable.gitlab.io/data.table' and 'https://Rdatatable.github.io/data."
+"table'."
+msgstr ""
+"Git 修订并不存在。可能是因为 data.table 是从 CRAN 或者是本地档案安装。\n"
+"Git 修订存在的情况只限于从我们资料库 'https://Rdatatable.gitlab.io/data."
+"table' 或者'https://Rdatatable.github.io/data.table'下载。"
+
 msgid "'fromLast' must be TRUE or FALSE"
 msgstr "'fromLast' 必须为 TRUE 或 FALSE"
 
@@ -1295,6 +1527,13 @@ msgstr "将被优先使用。"
 msgid "Please provide a name to each element of 'measure.vars'."
 msgstr "请为 'measure.vars' 中的每个元素提供一个名称。"
 
+msgid ""
+"Duplicate column names found in molten data.table. Setting unique names "
+"using 'make.names'"
+msgstr ""
+"重复的列名存在于在 molten 之后 data.table。请使用 'make.names' 设置唯一的列"
+"名。"
+
 msgid ""
 "y and x must both be data.tables. Use `setDT()` to convert list/data.frames "
 "to data.tables by reference or as.data.table() to convert to data.tables by "
@@ -1322,8 +1561,8 @@ msgid ""
 "'y' must be keyed (i.e., sorted, and, marked as sorted). Call setkey(y, ...) "
 "first, see ?setkey. Also check the examples in ?foverlaps."
 msgstr ""
-"'y' 必须有主键（已经排序并且标记为已排序）。请先用 setkey(y, ...) 设置主键，"
-"可以参考 ?setkey 以及 ?foverlaps 中提供的例子。"
+"'y' 必须有键（key:已经排序并且标记为已排序）。请先用 setkey(y, ...) 设置主"
+"键，可以参考 ?setkey 以及 ?foverlaps 中提供的例子。"
 
 msgid ""
 "'by.x' and 'by.y' should contain at least two column names (or numbers) each "
@@ -1354,7 +1593,7 @@ msgid "The first"
 msgstr "首先"
 
 msgid "columns of y's key must be identical to the columns specified in by.y."
-msgstr "在'by.y'中，y键的列必须与指定的列相同"
+msgstr "在'by.y'中，y键(key)的列必须与指定的列相同"
 
 msgid "Elements listed in 'by.x' must be valid names in data.table 'x'"
 msgstr "对于data.table中的'X'，'by.x'中的元素必须是有效名称"
@@ -1434,6 +1673,12 @@ msgstr ""
 "显示却重叠'的印象，（所以）请确保POSIXct类型的间隔列具有相同的'时区'属性以避"
 "免混乱。"
 
+msgid "unique() + setkey() operations done in ..."
+msgstr "unique() + setkey() 执行用了 ..."
+
+msgid "binary search(es) done in ..."
+msgstr "二进制搜索用了 . . . "
+
 msgid "Not yet implemented"
 msgstr "尚未实现"
 
@@ -1447,7 +1692,7 @@ msgid "length(na.last) > 1, only the first element will be used"
 msgstr "当na.last长度大于1时，只会使用第一个元素"
 
 msgid "x is a single vector, non-NULL 'cols' doesn't make sense"
-msgstr "x是单个向量，非空的'cols'没有意义"
+msgstr "x是单个向量，非NULL的'cols'没有意义"
 
 msgid "x is a list, 'cols' can not be 0-length"
 msgstr "x是一个list, 'cols'不能为0长度"
@@ -1633,6 +1878,10 @@ msgstr "正则 \""
 msgid "\". Please double check the input file is a valid csvy."
 msgstr "从这里开始"
 
+msgid ""
+"Processed %d lines of YAML metadata with the following top-level fields: %s"
+msgstr "处理了YAML元数据中的排列最前的 %d 行： %s"
+
 msgid "User-supplied 'header' will override that found in metadata."
 msgstr "用户提供的'header'将覆盖元数据中的表头"
 
@@ -1699,11 +1948,14 @@ msgstr ":"
 msgid "so the column has been left as type '"
 msgstr "所以该列已经被保存为类型"
 
+msgid "stringsAsFactors=%s converted %d column(s): %s"
+msgstr "stringsAsFactors=%s 改变 %d 列: %s"
+
 msgid ""
 "key argument of data.table() must be a character vector naming columns (NB: "
 "col.names are applied before this)"
 msgstr ""
-"data.table()的关键参数必须是字符向量命名的列（NB:col.names在这之前被使用过）"
+"data.table()的key参数必须是字符向量命名的列（NB:col.names在这之前被使用过）"
 
 msgid ""
 "index argument of data.table() must be a character vector naming columns "
@@ -1725,6 +1977,9 @@ msgstr ""
 msgid "x being coerced from class: matrix to data.table"
 msgstr "x 的类将强制从 matrix 转变为 data.table"
 
+msgid "Appending to existing file so setting bom=FALSE and yaml=FALSE"
+msgstr "并入了已存在的文件，所以设置 bom=FALSE 和 yaml=FALSE"
+
 msgid "Input has no columns; doing nothing."
 msgstr "输入没有列，不执行任何操作。"
 
@@ -1820,6 +2075,9 @@ msgid ""
 "Using integer64 class columns require to have 'bit64' package installed."
 msgstr "要在列中使用 integer64 类，需要先安装 'bit64' 包。"
 
+msgid "%s: using %s: %s"
+msgstr "%s: 用 %s: %s"
+
 msgid ""
 "'xts' class passed to %s function but 'xts' is not available, you should "
 "have 'xts' installed already"
@@ -1895,7 +2153,7 @@ msgid ""
 "**********"
 msgstr ""
 "**********\n"
-"用中文运行data.table。软件包只提供英语支持。当在在线搜索帮助时，也要确保检查"
+"用中文执行data.table。软件包只提供英语支持。当在在线搜索帮助时，也要确保检查"
 "英语错误信息。这个可以通过查看软件包源文件中的po/R-zh_CN.po和po/zh_CN.po文件"
 "获得，这个文件可以并排找到母语和英语错误信息。\n"
 "**********"
@@ -1963,35 +2221,26 @@ msgstr ""
 "用，但在未来不会被使用。相关的详细信息和动机，请参阅1.12.4的信息。要指定内部"
 "连接，请在调用中明确指定`nomatch = NULL`，而不要使用此选项更改默认值。"
 
-msgid "The datatable."
-msgstr "datatable"
-
-msgid "version ("
-msgstr "版本("
-
-msgid ") does not match the package ("
-msgstr ")和包不匹配 ("
-
-msgid "). Please close all R sessions to release the old"
-msgstr ").请关闭所有R会话以释放旧版本"
-
 msgid ""
-"and reinstall data.table in a fresh R session. The root cause is that R's "
-"package installer can in some unconfirmed circumstances leave a package in a "
-"state that is apparently functional but where new R code is calling old C "
-"code silently: https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17478. "
-"Once a package is in this mismatch state it may produce wrong results "
-"silently until you next upgrade the package. Please help by adding precise "
-"circumstances to 17478 to move the status to confirmed. This mismatch "
-"between R and C code can happen with any package not just data.table. It is "
-"just that data.table has added this check."
-msgstr ""
-"并在全新的R会话中重新安装data.table。根本原因是R包安装程序可能在某些未经确认"
-"的条件下将包置于显然可以正常工作的状态，但是新的R代码正在默默地调用旧的C代"
-"码：https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17478。一旦安装包处于"
-"这不匹配的状态下，在您下次升级程序包之前，它可能会默默地产生错误的结果请提交"
-"具体的情况至17478协助我们确认这个Bug。R和C代码之间的这种不匹配可能发生在任何"
-"包中，而不仅仅是在data.table中。只是data.table添加了这个检查"
+"The datatable.%s version (%s) does not match the package (%s). Please close "
+"all R sessions to release the old %s and reinstall data.table in a fresh R "
+"session. The root cause is that R's package installer can in some "
+"unconfirmed circumstances leave a package in a state that is apparently "
+"functional but where new R code is calling old C code silently: https://bugs."
+"r-project.org/bugzilla/show_bug.cgi?id=17478. Once a package is in this "
+"mismatch state it may produce wrong results silently until you next upgrade "
+"the package. Please help by adding precise circumstances to 17478 to move "
+"the status to confirmed. This mismatch between R and C code can happen with "
+"any package not just data.table. It is just that data.table has added this "
+"check."
+msgstr ""
+"data.table.%s版本(%s)和包不匹配版本(%s)。请关闭所有R会话以释放旧%s并在全新的R"
+"会话中重新安装data.table。根本原因是R包安装程序可能在某些未经确认的条件下将包"
+"置于显然可以正常工作的状态，但是新的R代码正在默默地调用旧的C代码：https://"
+"bugs.r-project.org/bugzilla/show_bug.cgi?id=17478。一旦安装包处于这不匹配的状"
+"态下，在您下次升级程序包之前，它可能会默默地产生错误的结果请提交具体的情况至"
+"17478协助我们确认这个Bug。R和C代码之间的这种不匹配可能发生在任何包中，而不仅"
+"仅是在data.table中。只是data.table添加了这个检查"
 
 msgid "This is R"
 msgstr "这是R"
@@ -2059,6 +2308,15 @@ msgstr "对col.names有效的参数为'auto', 'top', and 'none'"
 msgid "Column classes will be suppressed when col.names is 'none'"
 msgstr "当col.names为'none'时，列的类型将被抑制"
 
+msgid "Key: <%s>"
+msgstr "键(key): <%s>"
+
+msgid "Null data.%s (0 rows and 0 cols)"
+msgstr "NULL data.%s (0行，0列)"
+
+msgid "Empty data.%s (%d rows and %d cols)"
+msgstr "空的 data.%s (%d行，%d列)"
+
 msgid ""
 "Internal structure doesn't seem to be a list. Possibly corrupt data.table."
 msgstr "内部类型可能不是一个列表，该操作可能会损坏data.table"
@@ -2088,8 +2346,8 @@ msgid ""
 "the original data's order by group. Try setindex() instead. Or, set*(copy(."
 "SD)) as a (slow) last resort."
 msgstr ""
-"在.SD设置一个物理的键的功能被保留，以备未来的需求; 如需通过分组修改原数据顺序"
-"请使用setindex(), 或者set*(copy(.SD))作为最终(该方式缓慢)的方法"
+"在.SD设置一个物理的键(key)的功能被保留，以备未来的需求; 如需通过分组修改原数"
+"据顺序请使用setindex(), 或者set*(copy(.SD))作为最终(该方式缓慢)的方法"
 
 msgid ""
 "cols is a character vector of zero length. Removed the key, but use NULL "
@@ -2099,7 +2357,7 @@ msgstr ""
 "来避免警告"
 
 msgid "cols is the empty string. Use NULL to remove the key."
-msgstr "列为一个空字符串，请使用NULL以删除键值。"
+msgstr "列为一个空字符串，请使用NULL以删除键(key)值。"
 
 msgid "cols contains some blanks."
 msgstr "列中包含空白"
@@ -2115,15 +2373,27 @@ msgid "' is type '"
 msgstr "是类型"
 
 msgid "' which is not supported as a key column type, currently."
-msgstr "目前不是一种被支持的列类型"
+msgstr "目前不是一种被支持的键(key)列类型"
 
 msgid ""
 "Internal error. 'cols' should be character at this point in setkey; please "
 "report."
 msgstr "内部错误： 目前在setkey中，'cols'应该是字符类型, 请报告"
 
+msgid "forder took"
+msgstr "forder 用了"
+
+msgid "setkey on columns %s using existing index '%s'"
+msgstr "setkey到列%s用现有索引(index) '%s'"
+
+msgid "reorder took"
+msgstr "reorder 用了"
+
+msgid "x is already ordered by these columns, no need to call reorder"
+msgstr "x 已根据这些列进行了排序，无需调用 reorder"
+
 msgid "Internal error: index '"
-msgstr "内部错误：索引"
+msgstr "内部错误：索引(index) '"
 
 msgid "' exists but is invalid"
 msgstr "存在但无效"
@@ -2203,26 +2473,27 @@ msgstr "x 和 y 均需为 data.table"
 msgid "length(by.x) != length(by.y)"
 msgstr "length(by.x) != length(by.y)"
 
-msgid "When x's column ('"
-msgstr "当 x 的列 ('"
-
-msgid "') is character, the corresponding column in y ('"
-msgstr "') 是字符，y 中相应的列 ('"
-
-msgid "') should be factor or character, but found incompatible type '"
-msgstr "') 应该是因子或字符，然而此类型并不兼容：'"
-
-msgid "') is factor, the corresponding column in y ('"
-msgstr "') 是因子，y 中相应的列 ('"
-
-msgid "') should be character or factor, but found incompatible type '"
-msgstr "') 应该是字符或因子，然而此类型并不兼容：'"
+msgid ""
+"When x's column ('%s') is character, the corresponding column in y ('%s') "
+"should be factor or character, but found incompatible type '%s'."
+msgstr ""
+"当 x 的列('%s') 是字符，y 中相应的列 ('%s') 应该是因子或字符，然而此类型并不"
+"兼容：'%s'."
 
-msgid "') is integer or numeric, the corresponding column in y ('"
-msgstr "') 是整数或数值，y 中相应的列 ('"
+msgid ""
+"When x's column ('%s') is factor, the corresponding column in y ('%s') "
+"should be character or factor, but found incompatible type '%s'."
+msgstr ""
+"当 x 的列('%s') 是因子, y 中相应的列 ('%s') 应该是字符或因子，然而此类型并不"
+"兼容：'%s'."
 
-msgid "') can not be character or logical types, but found incompatible type '"
-msgstr "') 不能是字符或逻辑类型，然而此类型不兼容：'"
+msgid ""
+"When x's column ('%s') is integer or numeric, the corresponding column in y "
+"('%s') can not be character or logical types, but found incompatible type "
+"'%s'."
+msgstr ""
+"当 x 的列('%s') 是整数或数值，y 中相应的列('%s') 不能是字符或逻辑类型，然而此"
+"类型不兼容：'%s'."
 
 msgid "argument 'all' should be logical of length one"
 msgstr "参数 'all' 应该是长度为 1 的逻辑型"
@@ -2284,12 +2555,18 @@ msgstr "内部错误：此时不匹配的因子类型应已被发现"
 msgid "argument 'fill' ignored, only make sense for type='const'"
 msgstr "参数 'fill' 将被忽略，因其仅当 type='const'时有意义"
 
+msgid "No objects of class data.table exist in %s"
+msgstr "%s中没有 data.table类型的对象"
+
 msgid "order.col='"
 msgstr "order.col='"
 
 msgid "' not a column name of info"
 msgstr "' 并非info的一个列名"
 
+msgid "Total:"
+msgstr "共计:"
+
 msgid "data.table package is loaded. Unload or start a fresh R session."
 msgstr "data.table 包已被加载。请将其卸载或启动一个新的 R 会话。"
 
@@ -2303,27 +2580,40 @@ msgstr ""
 msgid "Neither %s nor %s exist in %s"
 msgstr "%3$s 中 %1$s 也 %2$s 不存在"
 
+msgid "test.data.table() running:"
+msgstr "test.data.table() 执行:"
+
+msgid ""
+"**** This R session's language is not English. Each test will still check "
+"that the correct number of errors and/or\n"
+"**** warnings are produced. However, to test the text of each error/warning "
+"too, please restart R with LANGUAGE=en"
+msgstr ""
+"**** 此 R 会话的语言并非英文。每个测试仍将检查生成的警告或错误的个数是否正"
+"确。**** 然而，若需同时测试警告和错误的文本内容，请用 LANGUAGE=en 重新启动 "
+"R。"
+
 msgid "Failed after test"
 msgstr "错误出现于测试"
 
 msgid "before the next test() call in"
 msgstr "后，先于下一调用test()于"
 
-msgid "out of"
-msgstr "总数为"
-
-msgid ". Search"
-msgstr ". 搜索"
-
-msgid "for test number"
-msgstr "以获得测试编号"
-
 msgid "Timings count mismatch:"
 msgstr "计时不一致:"
 
 msgid "vs"
 msgstr "vs"
 
+msgid "10 longest running tests took"
+msgstr "最慢10个测试用了"
+
+msgid "All %d tests in %s completed ok in %s"
+msgstr "%2$s中每%1$d个测试在%3$s结束了ok"
+
+msgid "Running test id %s"
+msgstr "执行测试 id %s"
+
 msgid "Test"
 msgstr "测试"
 
@@ -2331,6 +2621,51 @@ msgid ""
 "is invalid: when error= is provided it does not make sense to pass y as well"
 msgstr "无效：当使用了error=，不应再输入y"
 
+msgid "Test id %s is not in increasing order"
+msgstr "测试标识符 %s 不是递增的顺序"
+
+msgid "Test %s produced %d %ss but expected %d"
+msgstr "测试 %s 生成了%d %ss 但预计生成 %d"
+
+msgid ""
+"Test %s didn't produce the correct %s:\n"
+"Expected: %s\n"
+"Observed: %s"
+msgstr ""
+"测试 %s 没有生成正确的 %s:\n"
+"预计生成：%s\n"
+" 实际生成：%s "
+
+msgid "Output captured before unexpected warning/error/message:"
+msgstr "在意外的警告/错误/提示之前，输入已被记录："
+
+msgid ""
+"Test %s did not produce the correct output:\n"
+"Expected: <<%s>>\n"
+"Observed <<%s>>"
+msgstr ""
+"测试 %s 没有生成正确的输入： \n"
+"预计生成: <<%s>>\n"
+"实际生成：<<%s>>"
+
+msgid ""
+"Test %s produced output but should not have:\n"
+"Expected absent (case insensitive): <<%s>>\n"
+"Observed: <<%s>>"
+msgstr ""
+"测试 %s 生成输出但是不应当出现以下：\n"
+"预计不存在（不区分大小写）: <<%s>>\n"
+"实际生成：<<%s>>"
+
+msgid "Test %s ran without errors but selfrefok(%s) is FALSE"
+msgstr "测试 %s 可以无报错运行但是 selfrefok(%s) 是否："
+
+msgid "Test %s ran without errors but failed check that x equals y:"
+msgstr "测试 %s 可以无报错运行但是在检查 x 与 y 相同时候有报错："
+
+msgid "First %d of %d (type '%s'):"
+msgstr "第%d之%d (类型 '%s'):"
+
 msgid "Use started.at=proc.time() not Sys.time() (POSIXt and slow)"
 msgstr "使用started.at=proc.time()而非Sys.time() (返回POSIXt类型，处理较慢)"
 
@@ -2417,6 +2752,17 @@ msgstr ""
 msgid "Following columns are not numeric and will be omitted:"
 msgstr "以下的列并非数值类型，将被忽略："
 
+msgid "Index: "
+msgid_plural "Indices: "
+msgstr[0] "索引(index): "
+
 msgid "%d variable not shown: %s\n"
 msgid_plural "%d variables not shown: %s\n"
 msgstr[0] "%d变量没显示: %s\n"
+
+msgid "%d error out of %d. Search %s for test number %s"
+msgid_plural "%d errors out of %d. Search %s for test numbers %s"
+msgstr[0] "%d错误总数为%d. %s中搜索测试编号%s"
+
+#~ msgid "'target' and 'current' must both be data.tables"
+#~ msgstr "'target' 和 'current' 都必须是 data.table"
diff --git a/po/zh_CN.po b/po/zh_CN.po
index d9b54a4435..57242f7044 100644
--- a/po/zh_CN.po
+++ b/po/zh_CN.po
@@ -442,12 +442,12 @@ msgid ""
 "Dropping index '%s' as it doesn't have '__' at the beginning of its name. It "
 "was very likely created by v1.9.4 of data.table.\n"
 msgstr ""
-"丢掉索引 '%s' 因为它的名字前面没有 '__' 。这个很可能是 data.table v1.9.4 创建"
-"的\n"
+"丢掉索引(index) '%s' 因为它的名字前面没有 '__' 。这个很可能由data.table "
+"v1.9.4 创建\n"
 
 #: assign.c:574
 msgid "Internal error: index name ends with trailing __"
-msgstr "内部错误: 索引名称以 __ 结尾"
+msgstr "内部错误: 索引(index)名称以 __ 结尾"
 
 #: assign.c:579
 msgid "Internal error: Couldn't allocate memory for s4."
@@ -460,12 +460,12 @@ msgstr "内部错误: 不能给 s5 分配内存"
 #: assign.c:611 assign.c:627
 #, c-format
 msgid "Dropping index '%s' due to an update on a key column\n"
-msgstr " 因为一个主列的更新，丢掉索引 '%s'\n"
+msgstr " 因为一个键(key)列的更新，丢掉索引(index) '%s'\n"
 
 #: assign.c:620
 #, c-format
 msgid "Shortening index '%s' to '%s' due to an update on a key column\n"
-msgstr "因为一个主列的更新，缩短索引 '%s' 到 '%s'\n"
+msgstr "因为一个键(key)列的更新，缩短索引(index) '%s' 到 '%s'\n"
 
 #: assign.c:650
 #, c-format
diff --git a/src/assign.c b/src/assign.c
index 7a326baccc..1602e074b9 100644
--- a/src/assign.c
+++ b/src/assign.c
@@ -149,45 +149,43 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
   // NEW: cols argument to specify the columns to shallow copy on. If NULL, all columns.
   // called from alloccol where n is checked carefully, or from shallow() at R level
   // where n is set to truelength (i.e. a shallow copy only with no size change)
-  R_len_t i,l;
   int protecti=0;
   SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++;   // to do, use growVector here?
   SET_ATTRIB(newdt, shallow_duplicate(ATTRIB(dt)));
   SET_OBJECT(newdt, OBJECT(dt));
   IS_S4_OBJECT(dt) ? SET_S4_OBJECT(newdt) : UNSET_S4_OBJECT(newdt);  // To support S4 objects that incude data.table
   //SHALLOW_DUPLICATE_ATTRIB(newdt, dt);  // SHALLOW_DUPLICATE_ATTRIB would be a bit neater but is only available from R 3.3.0
-  
+
   // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It
   //        also increases truelength. Perhaps make that distinction, then, and split out, but marked
   //        so that the next change knows to duplicate.
   //        keepattr() also merely points to the entire attrbutes list and thus doesn't allow replacing
   //        some of its elements.
-  
+
   // We copy all attributes that refer to column names so that calling setnames on either
   // the original or the shallow copy doesn't break anything.
   SEXP index = PROTECT(getAttrib(dt, sym_index)); protecti++;
   setAttrib(newdt, sym_index, shallow_duplicate(index));
-  
+
   SEXP sorted = PROTECT(getAttrib(dt, sym_sorted)); protecti++;
   setAttrib(newdt, sym_sorted, duplicate(sorted));
-  
+
   SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++;
   SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++;
+  const int l = isNull(cols) ? LENGTH(dt) : length(cols);
   if (isNull(cols)) {
-    l = LENGTH(dt);
-    for (i=0; i<l; i++) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,i));
+    for (int i=0; i<l; ++i) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,i));
     if (length(names)) {
       if (length(names) < l) error(_("Internal error: length(names)>0 but <length(dt)")); // # nocov
-      for (i=0; i<l; i++) SET_STRING_ELT(newnames, i, STRING_ELT(names,i));
+      for (int i=0; i<l; ++i) SET_STRING_ELT(newnames, i, STRING_ELT(names,i));
     }
     // else an unnamed data.table is valid e.g. unname(DT) done by ggplot2, and .SD may have its names cleared in dogroups, but shallow will always create names for data.table(NULL) which has 100 slots all empty so you can add to an empty data.table by reference ok.
   } else {
-    l = length(cols);
-    for (i=0; i<l; i++) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,INTEGER(cols)[i]-1));
+    for (int i=0; i<l; ++i) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,INTEGER(cols)[i]-1));
     if (length(names)) {
       // no need to check length(names) < l here. R-level checks if all value
       // in 'cols' are valid - in the range of 1:length(names(x))
-      for (i=0; i<l; i++) SET_STRING_ELT( newnames, i, STRING_ELT(names,INTEGER(cols)[i]-1) );
+      for (int i=0; i<l; ++i) SET_STRING_ELT( newnames, i, STRING_ELT(names,INTEGER(cols)[i]-1) );
     }
   }
   setAttrib(newdt, R_NamesSymbol, newnames);
@@ -290,12 +288,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
   // newcolnames : add these columns (if any)
   // cols : column names or numbers corresponding to the values to set
   // rows : row numbers to assign
-  R_len_t i, j, numToDo, targetlen, vlen, oldncol, oldtncol, coln, protecti=0, newcolnum, indexLength;
+  R_len_t numToDo, targetlen, vlen, oldncol, oldtncol, coln, protecti=0, newcolnum, indexLength;
   SEXP targetcol, nullint, s, colnam, tmp, key, index, a, assignedNames, indexNames;
   bool verbose=GetVerbose();
   int ndelete=0;  // how many columns are being deleted
   const char *c1, *tc1, *tc2;
-  int *buf, newKeyLength, indexNo;
+  int *buf, indexNo;
   if (isNull(dt)) error(_("assign has been passed a NULL dt"));
   if (TYPEOF(dt) != VECSXP) error(_("dt passed to assign isn't type VECSXP"));
   if (islocked(dt))
@@ -336,7 +334,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
     targetlen = length(rows);
     numToDo = 0;
     const int *rowsd = INTEGER(rows);
-    for (i=0; i<targetlen; i++) {
+    for (int i=0; i<targetlen; ++i) {
       if ((rowsd[i]<0 && rowsd[i]!=NA_INTEGER) || rowsd[i]>nrow)
         error(_("i[%d] is %d which is out of range [1,nrow=%d]."),i+1,rowsd[i],nrow);  // set() reaches here (test 2005.2); := reaches the same error in subset.c first
       if (rowsd[i]>=1) numToDo++;
@@ -364,13 +362,13 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
     PROTECT(tmp = chmatch(cols, names, 0)); protecti++;
     buf = (int *) R_alloc(length(cols), sizeof(int));
     int k=0;
-    for (i=0; i<length(cols); i++) {
+    for (int i=0; i<length(cols); ++i) {
       if (INTEGER(tmp)[i] == 0) buf[k++] = i;
     }
     if (k>0) {
       if (!isDataTable) error(_("set() on a data.frame is for changing existing columns, not adding new ones. Please use a data.table for that. data.table's are over-allocated and don't shallow copy."));
       newcolnames = PROTECT(allocVector(STRSXP, k)); protecti++;
-      for (i=0; i<k; i++) {
+      for (int i=0; i<k; ++i) {
         SET_STRING_ELT(newcolnames, i, STRING_ELT(cols, buf[i]));
         INTEGER(tmp)[buf[i]] = oldncol+i+1;
       }
@@ -409,7 +407,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
     }
   }
   // Check all inputs :
-  for (i=0; i<length(cols); i++) {
+  for (int i=0; i<length(cols); ++i) {
     coln = INTEGER(cols)[i];
     if (coln<1 || coln>oldncol+length(newcolnames)) {
       if (!isDataTable) error(_("Item %d of column numbers in j is %d which is outside range [1,ncol=%d]. set() on a data.frame is for changing existing columns, not adding new ones. Please use a data.table for that."), i+1, coln, oldncol);
@@ -436,16 +434,20 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
       }
       // RHS of assignment to new column is zero length but we'll use its type to create all-NA column of that type
     }
-    if (isMatrix(thisvalue) && (j=INTEGER(getAttrib(thisvalue, R_DimSymbol))[1]) > 1)  // matrix passes above (considered atomic vector)
-      warning(_("%d column matrix RHS of := will be treated as one vector"), j);
+    {
+      int j;
+      if (isMatrix(thisvalue) && (j=INTEGER(getAttrib(thisvalue, R_DimSymbol))[1]) > 1)  // matrix passes above (considered atomic vector)
+        warning(_("%d column matrix RHS of := will be treated as one vector"), j);
+    }
     const SEXP existing = (coln+1)<=oldncol ? VECTOR_ELT(dt,coln) : R_NilValue;
     if (isFactor(existing) &&
       !isString(thisvalue) && TYPEOF(thisvalue)!=INTSXP && TYPEOF(thisvalue)!=LGLSXP && !isReal(thisvalue) && !isNewList(thisvalue)) {  // !=INTSXP includes factor
       error(_("Can't assign to column '%s' (type 'factor') a value of type '%s' (not character, factor, integer or numeric)"),
             CHAR(STRING_ELT(names,coln)),type2char(TYPEOF(thisvalue)));
     }
-    if (nrow>0 && targetlen>0 && vlen>1 && vlen!=targetlen && (TYPEOF(existing)!=VECSXP || TYPEOF(thisvalue)==VECSXP)) {
-      // note that isNewList(R_NilValue) is true so it needs to be TYPEOF(existing)!=VECSXP above
+    if (nrow>0 && targetlen>0 && vlen>1 && vlen!=targetlen && !(TYPEOF(existing)==VECSXP && targetlen==1)) {
+      // We allow assigning objects of arbitrary to single items of list columns for convenience.
+      // Note that isNewList(R_NilValue) is true so it needs to be !(TYPEOF(existing)==VECSXP) above
       error(_("Supplied %d items to be assigned to %d items of column '%s'. If you wish to 'recycle' the RHS please use rep() to make this intent clear to readers of your code."), vlen, targetlen, CHAR(colnam));
     }
   }
@@ -470,11 +472,19 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
       error(_("Internal error: selfrefnames is ok but tl names [%d] != tl [%d]"), TRUELENGTH(names), oldtncol);  // # nocov
     SETLENGTH(dt, oldncol+LENGTH(newcolnames));
     SETLENGTH(names, oldncol+LENGTH(newcolnames));
-    for (i=0; i<LENGTH(newcolnames); i++)
+    for (int i=0; i<LENGTH(newcolnames); ++i)
       SET_STRING_ELT(names,oldncol+i,STRING_ELT(newcolnames,i));
     // truelengths of both already set by alloccol
+    if (oldncol==0) {
+      // adding columns to null data.table needs row.names set, #4597
+      SEXP rn;
+      PROTECT(rn = allocVector(INTSXP, 2)); protecti++;
+      INTEGER(rn)[0] = NA_INTEGER;
+      INTEGER(rn)[1] = -nrow;
+      setAttrib(dt, R_RowNamesSymbol, rn);
+    }
   }
-  for (i=0; i<length(cols); i++) {
+  for (int i=0; i<length(cols); ++i) {
     coln = INTEGER(cols)[i]-1;
     SEXP thisvalue = RHS_list_of_columns ? VECTOR_ELT(values, i) : values;
     if (TYPEOF(thisvalue)==NILSXP) {
@@ -520,14 +530,14 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
 
   *_Last_updated = numToDo;  // the updates have taken place with no error, so update .Last.updated now
   assignedNames = PROTECT(allocVector(STRSXP, LENGTH(cols))); protecti++;
-  for (i=0;i<LENGTH(cols);i++) SET_STRING_ELT(assignedNames,i,STRING_ELT(names,INTEGER(cols)[i]-1));
+  for (int i=0; i<LENGTH(cols); ++i) SET_STRING_ELT(assignedNames,i,STRING_ELT(names,INTEGER(cols)[i]-1));
   key = getAttrib(dt, sym_sorted);
   if (length(key)) {
     // if assigning to at least one key column, the key is truncated to one position before the first changed column.
     //any() and subsetVector() don't seem to be exposed by R API at C level, so this is done here long hand.
     PROTECT(tmp = chin(key, assignedNames, false)); protecti++;
-    newKeyLength = xlength(key);
-    for (i=0;i<LENGTH(tmp);i++) if (LOGICAL(tmp)[i]) {
+    int newKeyLength = xlength(key);
+    for (int i=0;i<LENGTH(tmp);i++) if (LOGICAL(tmp)[i]) {
       // If a key column is being assigned to, set newKeyLength to the key element before since everything after that may have changed in order.
       newKeyLength = i;
       break;
@@ -535,10 +545,10 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
     if(newKeyLength == 0){
       // no valid key columns remain, remove the key
       setAttrib(dt, sym_sorted, R_NilValue);
-    } else if (newKeyLength < xlength(key)){
+    } else if (newKeyLength < length(key)){
       // new key is shorter than original one. Reassign
       PROTECT(tmp = allocVector(STRSXP, newKeyLength)); protecti++;
-      for (int i=0; i<newKeyLength; i++) SET_STRING_ELT(tmp, i, STRING_ELT(key, i));
+      for (int i=0; i<newKeyLength; ++i) SET_STRING_ELT(tmp, i, STRING_ELT(key, i));
       setAttrib(dt, sym_sorted, tmp);
     }
     //else: no key column changed, nothing to be done
@@ -581,7 +591,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
       memcpy(s4, c1, strlen(c1));
       memset(s4 + strlen(c1), '\0', 1);
       strcat(s4, "__"); // add trailing '__' to newKey so we can search for pattern '__colName__' also at the end of the index.
-      newKeyLength = strlen(c1);
+      int newKeyLength = strlen(c1);
       for(int i = 0; i < xlength(assignedNames); i++){
         tc2 = CHAR(STRING_ELT(assignedNames, i));
         char *s5 = (char*) malloc(strlen(tc2) + 5); //4 * '_' + \0
@@ -682,7 +692,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
 // sourceLen==1 is used in dogroups to recycle the group values into ans to match the nrow of each group's result; sourceStart is set to each group value row.
 {
   if (len<1) return NULL;
-  const int slen = sourceLen>=0 ? sourceLen : length(source);
+  int slen = sourceLen>=0 ? sourceLen : length(source); // since source may get reassigned to a scalar, we should not mark it as const
   if (slen==0) return NULL;
   if (sourceStart<0 || sourceStart+slen>length(source))
     error(_("Internal error memrecycle: sourceStart=%d sourceLen=%d length(source)=%d"), sourceStart, sourceLen, length(source)); // # nocov
@@ -710,7 +720,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
     } else if (!sourceIsFactor && !isString(source)) {
       // target is factor
       if (allNA(source, false)) {  // return false for list and other types that allNA does not support
-        source = ScalarLogical(NA_LOGICAL); // a global constant in R and won't allocate; fall through to regular zero-copy coerce
+        source = ScalarLogical(NA_LOGICAL); slen = 1; // a global constant in R and won't allocate; fall through to regular zero-copy coerce
       } else if (isInteger(source) || isReal(source)) {
         // allow assigning level numbers to factor columns; test 425, 426, 429 and 1945
         const int nlevel = length(getAttrib(target, R_LevelsSymbol));
@@ -1056,11 +1066,30 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
       BODY(SEXP, STRING_PTR, SEXP, val,  SET_STRING_ELT(target, off+i, cval))
     }
   case VECSXP :
-  case EXPRSXP :  // #546
-    if (TYPEOF(source)!=VECSXP && TYPEOF(source)!=EXPRSXP)
-      BODY(SEXP, &, SEXP, val,           SET_VECTOR_ELT(target, off+i, cval))
-    else
-      BODY(SEXP, SEXPPTR_RO, SEXP, val,  SET_VECTOR_ELT(target, off+i, cval))
+  case EXPRSXP : {  // #546 #4350
+    if (len == 1 && TYPEOF(source)!=VECSXP && TYPEOF(source)!=EXPRSXP) {
+        BODY(SEXP, &, SEXP, val, SET_VECTOR_ELT(target, off+i, cval))
+    } else {
+      switch (TYPEOF(source)) {
+      // allocVector instead of ScalarLogical to avoid copyMostAttrib on R's internal global TRUE/FALSE values; #4595. Then because
+      //   ScalarInteger may now or in future R also return R internal global small integer constants, the same for that. Then
+      //   because we do that here for logical and integer, use allocVeector too for the other types to follow the same pattern and possibly
+      //   in future R will also have some global constants for those types too.
+      // the UNPROTECT can be at the end of the CAST before the SET_VECTOR_ELT, because SET_VECTOR_ELT will protect it and there's no other code inbetween
+      // the PROTECT is now needed because of the call to LOGICAL() which could feasibly gc inside it.
+      // copyMostAttrib is inside CAST so as to be outside loop.  See the history in #4350 and its follow up
+      case RAWSXP:  BODY(Rbyte,    RAW,        SEXP, PROTECT(allocVector(RAWSXP, 1));RAW(cval)[0]=val;copyMostAttrib(source,cval);UNPROTECT(1),             SET_VECTOR_ELT(target,off+i,cval))
+      case LGLSXP:  BODY(int,      LOGICAL,    SEXP, PROTECT(allocVector(LGLSXP, 1));LOGICAL(cval)[0]=val;copyMostAttrib(source,cval);UNPROTECT(1),         SET_VECTOR_ELT(target,off+i,cval))
+      case INTSXP:  BODY(int,      INTEGER,    SEXP, PROTECT(allocVector(INTSXP, 1));INTEGER(cval)[0]=val;copyMostAttrib(source,cval);UNPROTECT(1),         SET_VECTOR_ELT(target,off+i,cval))
+      case REALSXP: BODY(double,   REAL,       SEXP, PROTECT(allocVector(REALSXP, 1));REAL(cval)[0]=val;copyMostAttrib(source,cval);UNPROTECT(1),           SET_VECTOR_ELT(target,off+i,cval))
+      case CPLXSXP: BODY(Rcomplex, COMPLEX,    SEXP, PROTECT(allocVector(CPLXSXP, 1));COMPLEX(cval)[0]=val;copyMostAttrib(source,cval);UNPROTECT(1),        SET_VECTOR_ELT(target,off+i,cval))
+      case STRSXP:  BODY(SEXP,     STRING_PTR, SEXP, PROTECT(allocVector(STRSXP, 1));SET_STRING_ELT(cval, 0, val);copyMostAttrib(source,cval);UNPROTECT(1), SET_VECTOR_ELT(target,off+i,cval))
+      case VECSXP:
+      case EXPRSXP: BODY(SEXP,     SEXPPTR_RO, SEXP, val,                                                                                                   SET_VECTOR_ELT(target,off+i,cval))
+      default: COERCE_ERROR("list");
+      }
+    }
+  } break;
   default :
     error(_("Unsupported column type in assign.c:memrecycle '%s'"), type2char(TYPEOF(target)));  // # nocov
   }
diff --git a/src/chmatch.c b/src/chmatch.c
index 75e45924de..d7fb90a573 100644
--- a/src/chmatch.c
+++ b/src/chmatch.c
@@ -80,11 +80,14 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
   }
   int nuniq=0;
   for (int i=0; i<tablelen; ++i) {
-    SEXP s = td[i];
+    const SEXP s = td[i];
     int tl = TRUELENGTH(s);
     if (tl>0) { savetl(s); tl=0; }
     if (tl==0) SET_TRUELENGTH(s, chmatchdup ? -(++nuniq) : -i-1); // first time seen this string in table
   }
+  // in future if we need NAs in x not to be matched to NAs in table ...
+  // if (!matchNAtoNA && TRUELENGTH(NA_STRING)<0)
+  //   SET_TRUELENGTH(NA_STRING, 0);
   if (chmatchdup) {
     // chmatchdup() is basically base::pmatch() but without the partial matching part. For example :
     //   chmatchdup(c("a", "a"), c("a", "a"))   # 1,2  - the second 'a' in 'x' has a 2nd match in 'table'
@@ -113,7 +116,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
     for (int i=0; i<xlen; ++i) {
       int u = TRUELENGTH(xd[i]);
       if (u<0) {
-        int w = counts[-u-1]++;
+        const int w = counts[-u-1]++;
         if (map[w]) { ansd[i]=map[w]; continue; }
         SET_TRUELENGTH(xd[i],0); // w falls on ending 0 marker: dups used up; any more dups should return nomatch
         // we still need the 0-setting loop at the end of this function because often there will be some values in table that are not matched to at all.
@@ -135,7 +138,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
     
   } else {
     for (int i=0; i<xlen; i++) {
-      int m = TRUELENGTH(xd[i]);
+      const int m = TRUELENGTH(xd[i]);
       ansd[i] = (m<0) ? -m : nomatch;
     }
   }
diff --git a/src/data.table.h b/src/data.table.h
index 64e3dc56af..2376873490 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -89,6 +89,7 @@ extern SEXP char_ordered;
 extern SEXP char_datatable;
 extern SEXP char_dataframe;
 extern SEXP char_NULL;
+extern SEXP char_maxString;
 extern SEXP sym_sorted;
 extern SEXP sym_index;
 extern SEXP sym_BY;
@@ -101,6 +102,7 @@ extern SEXP sym_inherits;
 extern SEXP sym_datatable_locked;
 extern SEXP sym_tzone;
 extern SEXP sym_old_fread_datetime_character;
+extern SEXP sym_variable_table;
 extern double NA_INT64_D;
 extern long long NA_INT64_LL;
 extern Rcomplex NA_CPLX;  // initialized in init.c; see there for comments
@@ -128,7 +130,7 @@ int checkOverAlloc(SEXP x);
 // forder.c
 int StrCmp(SEXP x, SEXP y);
 uint64_t dtwiddle(double x);
-SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg);
+SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, SEXP naArg);
 int getNumericRounding_C();
 
 // reorder.c
@@ -138,6 +140,7 @@ SEXP setcolorder(SEXP x, SEXP o);
 // subset.c
 void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA);
 SEXP subsetVector(SEXP x, SEXP idx);
+const char *check_idx(SEXP idx, int max, bool *anyNA_out, bool *orderedSubset_out);
 
 // fcast.c
 SEXP int_vec_init(R_len_t n, int val);
@@ -253,3 +256,5 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args);
 //snprintf.c
 int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...);
 
+// programming.c
+SEXP substitute_call_arg_namesR(SEXP expr, SEXP env);
diff --git a/src/dogroups.c b/src/dogroups.c
index 6ef4cb9815..5bb9983408 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -49,7 +49,7 @@ static bool anySpecialStatic(SEXP x) {
   if (isNewList(x)) {
     if (TRUELENGTH(x)<0)
       return true;  // test 2158
-    for (int i=0; i<n; ++i) {  
+    for (int i=0; i<n; ++i) {
       if (anySpecialStatic(VECTOR_ELT(x,i)))
         return true;
     }
@@ -117,7 +117,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   SEXP dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); nprotect++; // added here to fix #91 - `:=` did not issue recycling warning during "by"
   // fetch rownames of .SD.  rownames[1] is set to -thislen for each group, in case .SD is passed to
   // non data.table aware package that uses rownames
-  for (s = ATTRIB(SD); s != R_NilValue && TAG(s)!=R_RowNamesSymbol; s = CDR(s));  // getAttrib0 basically but that's hidden in attrib.c
+  for (s = ATTRIB(SD); s != R_NilValue && TAG(s)!=R_RowNamesSymbol; s = CDR(s));  // getAttrib0 basically but that's hidden in attrib.c; #loop_counter_not_local_scope_ok
   if (s==R_NilValue) error(_("row.names attribute of .SD not found"));
   rownames = CAR(s);
   if (!isInteger(rownames) || LENGTH(rownames)!=2 || INTEGER(rownames)[0]!=NA_INTEGER) error(_("row.names of .SD isn't integer length 2 with NA as first item; i.e., .set_row_names(). [%s %d %d]"),type2char(TYPEOF(rownames)),LENGTH(rownames),INTEGER(rownames)[0]);
@@ -127,7 +127,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   SEXP names = PROTECT(getAttrib(SDall, R_NamesSymbol)); nprotect++;
   if (length(names) != length(SDall)) error(_("length(names)!=length(SD)"));
   SEXP *nameSyms = (SEXP *)R_alloc(length(names), sizeof(SEXP));
-  
+
   for(int i=0; i<length(SDall); ++i) {
     SEXP this = VECTOR_ELT(SDall, i);
     if (SIZEOF(this)==0)
@@ -158,6 +158,12 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   ansloc = 0;
   const int *istarts = INTEGER(starts);
   const int *iorder = INTEGER(order);
+
+  // We just want to set anyNA for later. We do it only once for the whole operation
+  // because it is a rare edge case for it to be true. See #4892.
+  bool anyNA=false, orderedSubset=false;
+  check_idx(order, length(VECTOR_ELT(dt, 0)), &anyNA, &orderedSubset);
+
   for(int i=0; i<ngrp; ++i) {   // even for an empty i table, ngroup is length 1 (starts is value 0), for consistency of empty cases
 
     if (istarts[i]==0 && (i<ngrp-1 || estn>-1)) continue;
@@ -233,7 +239,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         for (int k=0; k<grpn; ++k) iI[k] = iorder[rownum+k];
         for (int j=0; j<length(SDall); ++j) {
           // this is the main non-contiguous gather, and is parallel (within-column) for non-SEXP
-          subsetVectorRaw(VECTOR_ELT(SDall,j), VECTOR_ELT(dt,INTEGER(dtcols)[j]-1), I, /*anyNA=*/false);
+          subsetVectorRaw(VECTOR_ELT(SDall,j), VECTOR_ELT(dt,INTEGER(dtcols)[j]-1), I, anyNA);
         }
         if (verbose) { tblock[1] += clock()-tstart; nblock[1]++; }
         // The two blocks have separate timing statements to make sure which is running
@@ -366,7 +372,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
           if (verbose) Rprintf(_("The result of j is a named list. It's very inefficient to create the same names over and over again for each group. When j=list(...), any names are detected, removed and put back after grouping has completed, for efficiency. Using j=transform(), for example, prevents that speedup (consider changing to :=). This message may be upgraded to warning in future.\n"));  // e.g. test 104 has j=transform().
           // names of result come from the first group and the names of remaining groups are ignored (all that matters for them is that the number of columns (and their types) match the first group.
           SEXP names2 = PROTECT(allocVector(STRSXP,ngrpcols+njval));
-          //  for (j=0; j<ngrpcols; j++) SET_STRING_ELT(names2, j, STRING_ELT(bynames,j));  // These get set back up in R
+          //  for (int j=0; j<ngrpcols; ++j) SET_STRING_ELT(names2, j, STRING_ELT(bynames,j));  // These get set back up in R
           for (int j=0; j<njval; ++j) SET_STRING_ELT(names2, ngrpcols+j, STRING_ELT(jvalnames,j));
           setAttrib(ans, R_NamesSymbol, names2);
           UNPROTECT(1); // names2
diff --git a/src/fastmean.c b/src/fastmean.c
index 75c6c033ce..2fcc6ebd21 100644
--- a/src/fastmean.c
+++ b/src/fastmean.c
@@ -29,7 +29,7 @@ if we become out of line to base R (say if base R changed its mean).
 SEXP fastmean(SEXP args)
 {
   long double s = 0., t = 0.;
-  R_len_t i, l = 0, n = 0;
+  R_len_t l = 0, n = 0;
   SEXP x, ans, tmp;
   Rboolean narm=FALSE;
   x=CADR(args);
@@ -49,7 +49,7 @@ SEXP fastmean(SEXP args)
     switch(TYPEOF(x)) {
     case LGLSXP:
     case INTSXP:
-      for (i = 0; i<l; i++) {
+      for (int i=0; i<l; ++i) {
         if(INTEGER(x)[i] == NA_INTEGER) continue;
         s += INTEGER(x)[i];   // no under/overflow here, s is long double not integer
         n++;
@@ -60,7 +60,7 @@ SEXP fastmean(SEXP args)
         REAL(ans)[0] = R_NaN;  // consistent with base: mean(NA,na.rm=TRUE)==NaN==mean(numeric(),na.rm=TRUE)
       break;
     case REALSXP:
-      for (i = 0; i<l; i++) {
+      for (int i=0; i<l; ++i) {
         if(ISNAN(REAL(x)[i])) continue;  // TO DO: could drop this line and let NA propogate?
         s += REAL(x)[i];
         n++;
@@ -71,7 +71,7 @@ SEXP fastmean(SEXP args)
       }
       s /= n;
       if(R_FINITE((double)s)) {
-        for (i = 0; i<l; i++) {
+        for (int i=0; i<l; ++i) {
           if(ISNAN(REAL(x)[i])) continue;
           t += (REAL(x)[i] - s);
         }
@@ -86,20 +86,20 @@ SEXP fastmean(SEXP args)
     switch(TYPEOF(x)) {
     case LGLSXP:
     case INTSXP:
-      for (i = 0; i<l; i++) {
+      for (int i=0; i<l; ++i) {
         if(INTEGER(x)[i] == NA_INTEGER) {UNPROTECT(1); return(ans);}
         s += INTEGER(x)[i];
       }
       REAL(ans)[0] = (double) (s/l);
       break;
     case REALSXP:
-      for (i = 0; i<l; i++) {
+      for (int i=0; i<l; ++i) {
         if(ISNAN(REAL(x)[i])) {UNPROTECT(1); return(ans);}
         s += REAL(x)[i];
       }
       s /= l;
       if(R_FINITE((double)s)) {
-        for (i = 0; i<l; i++) {
+        for (int i=0; i<l; ++i) {
           // no NA if got this far
           t += (REAL(x)[i] - s);
         }
@@ -118,13 +118,13 @@ SEXP fastmean(SEXP args)
 /*
     case CPLXSXP:
       PROTECT(ans = allocVector(CPLXSXP, 1));
-      for (i = 0; i < n; i++) {
+      for (int i=0; i<n; ++i) {
       s += COMPLEX(x)[i].r;
       si += COMPLEX(x)[i].i;
       }
       s /= n; si /= n;
       if( R_FINITE((double)s) && R_FINITE((double)si) ) {
-      for (i = 0; i < n; i++) {
+      for (int i=0; i<n; ++i) {
         t += COMPLEX(x)[i].r - s;
         ti += COMPLEX(x)[i].i - si;
       }
diff --git a/src/fcast.c b/src/fcast.c
index b8d6fa4c2a..8819f20019 100644
--- a/src/fcast.c
+++ b/src/fcast.c
@@ -90,11 +90,10 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil
 // // # nocov start
 // // Note: all these functions below are internal functions and are designed specific to fcast.
 // SEXP zero_init(R_len_t n) {
-//   R_len_t i;
 //   SEXP ans;
 //   if (n < 0) error(_("Input argument 'n' to 'zero_init' must be >= 0"));
 //   ans = PROTECT(allocVector(INTSXP, n));
-//   for (i=0; i<n; i++) INTEGER(ans)[i] = 0;
+//   for (int i=0; i<n; ++i) INTEGER(ans)[i] = 0;
 //   UNPROTECT(1);
 //   return(ans);
 // }
@@ -126,11 +125,10 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil
 // }
 
 // SEXP diff_int(SEXP x, R_len_t n) {
-//   R_len_t i;
 //   SEXP ans;
 //   if (TYPEOF(x) != INTSXP) error(_("Argument 'x' to 'diff_int' must be an integer vector"));
 //   ans = PROTECT(allocVector(INTSXP, length(x)));
-//   for (i=1; i<length(x); i++)
+//   for (int i=1; i<length(x); ++i)
 //     INTEGER(ans)[i-1] = INTEGER(x)[i] - INTEGER(x)[i-1];
 //   INTEGER(ans)[length(x)-1] = n - INTEGER(x)[length(x)-1] + 1;
 //   UNPROTECT(1);
@@ -138,16 +136,16 @@ SEXP fcast(SEXP lhs, SEXP val, SEXP nrowArg, SEXP ncolArg, SEXP idxArg, SEXP fil
 // }
 
 // SEXP intrep(SEXP x, SEXP len) {
-//   R_len_t i,j,l=0, k=0;
+//   R_len_t l=0, k=0;
 //   SEXP ans;
 //   if (TYPEOF(x) != INTSXP || TYPEOF(len) != INTSXP) error(_("Arguments 'x' and 'len' to 'intrep' should both be integer vectors"));
 //   if (length(x) != length(len)) error(_("'x' and 'len' must be of same length"));
 //   // assuming both are of length >= 1
-//   for (i=0; i<length(len); i++)
+//   for (int i=0; i<length(len); ++i)
 //     l += INTEGER(len)[i]; // assuming positive values for len. internal use - can't bother to check.
 //   ans = PROTECT(allocVector(INTSXP, l));
-//   for (i=0; i<length(len); i++) {
-//     for (j=0; j<INTEGER(len)[i]; j++) {
+//   for (int i=0; i<length(len); ++i) {
+//     for (int j=0; j<INTEGER(len)[i]; ++j) {
 //       INTEGER(ans)[k++] = INTEGER(x)[i];
 //     }
 //   }
diff --git a/src/fifelse.c b/src/fifelse.c
index 398cefb212..c2497b566d 100644
--- a/src/fifelse.c
+++ b/src/fifelse.c
@@ -11,127 +11,182 @@ SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na) {
   const int64_t len0 = xlength(l);
   const int64_t len1 = xlength(a);
   const int64_t len2 = xlength(b);
+  const int64_t len3 = xlength(na);
   SEXPTYPE ta = TYPEOF(a);
   SEXPTYPE tb = TYPEOF(b);
-  int nprotect = 0;
+  SEXPTYPE tn = TYPEOF(na);
+  // na_a/b/n means a scalar NA (or NULL for the na argument), which is considered to be coerced into other types
+  bool na_a = len1==1 && ta==LGLSXP && LOGICAL(a)[0]==NA_LOGICAL;
+  bool na_b = len2==1 && tb==LGLSXP && LOGICAL(b)[0]==NA_LOGICAL;
+  bool na_n = isNull(na) || (len3==1 && tn==LGLSXP && LOGICAL(na)[0]==NA_LOGICAL);
 
-  if (ta != tb) {
-    if (ta == INTSXP && tb == REALSXP) {
-      SEXP tmp = PROTECT(coerceVector(a, REALSXP)); nprotect++;
-      a = tmp;
+  if (!na_a && len1!=1 && len1!=len0)
+    error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0);
+  if (!na_b && len2!=1 && len2!=len0)
+    error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0);
+  if (!na_n && len3!=1 && len3!=len0)
+    error(_("Length of 'na' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len3, len0);
+  
+  int nprotect = 0;
+  SEXPTYPE tans = !na_a ? ta : !na_b ? tb : !na_n ? tn : LGLSXP;
+  if (!(na_a && na_b && na_n)) {
+    SEXPTYPE ta0 = ta, tb0 = tb, tn0 = tn; // record the original type for error message use
+    if (!na_b && tans==INTSXP && tb==REALSXP) tans = tb;
+    if (!na_n && tans==INTSXP && tn==REALSXP) tans = tn;
+    if (!na_a && tans==REALSXP && ta==INTSXP) {
+      a = PROTECT(coerceVector(a, REALSXP)); nprotect++;
       ta = REALSXP;
-    } else if (ta == REALSXP && tb == INTSXP) {
-      SEXP tmp = PROTECT(coerceVector(b, REALSXP)); nprotect++;
-      b = tmp;
+    }
+    // it's not possible that non-NA `yes`' type will be different from `tans`
+    if (!na_b && tans==REALSXP && tb==INTSXP) {
+      b = PROTECT(coerceVector(b, REALSXP)); nprotect++;
       tb = REALSXP;
-    } else {
-      error(_("'yes' is of type %s but 'no' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tb));
     }
+    if (!na_b && tans != tb) 
+      error(_("'no' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tb0), tans==ta0 ? "yes" : "na", tans==ta0 ? type2char(ta0) : type2char(tn0));
+    if (!na_n && tans==REALSXP && tn==INTSXP) {
+      na = PROTECT(coerceVector(na, REALSXP)); nprotect++;
+      tn = REALSXP;
+    }
+    if (!na_n && tans != tn) 
+      error(_("'na' is of type %s but '%s' is %s. Please make all arguments have the same type."), type2char(tn0), tans==ta0 ? "yes" : "no", tans==ta0 ? type2char(ta0) : type2char(tb0));
   }
-
-  if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0))
-    error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class."));
-  UNPROTECT(2);
-
-  if (isFactor(a)) {
-    if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0))
-      error(_("'yes' and 'no' are both type factor but their levels are different."));
+  
+  if (!na_a && !na_b) {
+    if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(b,R_ClassSymbol)), 0))
+      error(_("'yes' has different class than 'no'. Please make sure that both arguments have the same class."));
+    UNPROTECT(2);
+  }
+  if (!na_a && !na_n) {
+    if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0))
+      error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class."));
+    UNPROTECT(2);
+  }
+  if (!na_b && !na_n) {
+    if (!R_compute_identical(PROTECT(getAttrib(b,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0))
+      error(_("'no' has different class than 'na'. Please make sure that both arguments have the same class."));
     UNPROTECT(2);
   }
+  
+  if (isFactor(a) || isFactor(b)) {
+    if (!na_a && !na_b) {
+      if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(b,R_LevelsSymbol)), 0))
+        error(_("'yes' and 'no' are both type factor but their levels are different."));
+      UNPROTECT(2);  
+    }
+    if (!na_a && !na_n) {
+      if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0))
+        error(_("'yes' and 'na' are both type factor but their levels are different."));
+      UNPROTECT(2);
+    }
+    if (!na_b && !na_n) {
+      if (!R_compute_identical(PROTECT(getAttrib(b,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0))
+        error(_("'no' and 'na' are both type factor but their levels are different."));
+      UNPROTECT(2);
+    }
+  }
 
-  if (len1!=1 && len1!=len0)
-    error(_("Length of 'yes' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len1, len0);
-  if (len2!=1 && len2!=len0)
-    error(_("Length of 'no' is %"PRId64" but must be 1 or length of 'test' (%"PRId64")."), len2, len0);
   const int64_t amask = len1>1 ? INT64_MAX : 0; // for scalar 'a' bitwise AND will reset iterator to first element: pa[i & amask] -> pa[0]
   const int64_t bmask = len2>1 ? INT64_MAX : 0;
+  const int64_t nmask = len3>1 ? INT64_MAX : 0;
 
   const int *restrict pl = LOGICAL(l);
-  SEXP ans = PROTECT(allocVector(ta, len0)); nprotect++;
-  copyMostAttrib(a, ans);
-
-  bool nonna = !isNull(na);
-  if (nonna) {
-    if (xlength(na) != 1)
-      error(_("Length of 'na' is %"PRId64" but must be 1"), (int64_t)xlength(na));
-    SEXPTYPE tn = TYPEOF(na);
-    if (tn == LGLSXP && LOGICAL(na)[0]==NA_LOGICAL) {
-      nonna = false;
-    } else {
-      if (tn != ta)
-        error(_("'yes' is of type %s but 'na' is of type %s. Please make sure that both arguments have the same type."), type2char(ta), type2char(tn));
-      if (!R_compute_identical(PROTECT(getAttrib(a,R_ClassSymbol)), PROTECT(getAttrib(na,R_ClassSymbol)), 0))
-        error(_("'yes' has different class than 'na'. Please make sure that both arguments have the same class."));
-      UNPROTECT(2);
-      if (isFactor(a)) {
-        if (!R_compute_identical(PROTECT(getAttrib(a,R_LevelsSymbol)), PROTECT(getAttrib(na,R_LevelsSymbol)), 0))
-          error(_("'yes' and 'na' are both type factor but their levels are different."));
-        UNPROTECT(2);
-      }
-    }
-  }
+  SEXP ans = PROTECT(allocVector(tans, len0)); nprotect++;
+  if (!na_a) 
+    copyMostAttrib(a, ans);
+  else if (!na_b)
+    copyMostAttrib(b, ans);
+  else if (!na_n)
+    copyMostAttrib(na, ans);
 
-  switch(ta) {
+  switch(tans) {
   case LGLSXP: {
     int *restrict pans = LOGICAL(ans);
-    const int *restrict pa   = LOGICAL(a);
-    const int *restrict pb   = LOGICAL(b);
-    const int pna = nonna ? LOGICAL(na)[0] : NA_LOGICAL;
+    const int *restrict pa = na_a ? NULL : LOGICAL(a);
+    const int *restrict pb = na_b ? NULL : LOGICAL(b);
+    const int *restrict pna = na_n ? NULL : LOGICAL(na);
+    const int na = NA_LOGICAL;
     #pragma omp parallel for num_threads(getDTthreads(len0, true))
     for (int64_t i=0; i<len0; ++i) {
-      pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
+      pans[i] = pl[i]==0 ?
+                (na_b ? na : pb[i & bmask]) :
+                pl[i]==1 ? 
+                (na_a ? na : pa[i & amask]) : 
+                (na_n ? na : pna[i & nmask]);
     }
   } break;
   case INTSXP: {
     int *restrict pans = INTEGER(ans);
-    const int *restrict pa   = INTEGER(a);
-    const int *restrict pb   = INTEGER(b);
-    const int pna = nonna ? INTEGER(na)[0] : NA_INTEGER;
+    const int *restrict pa = na_a ? NULL : INTEGER(a);
+    const int *restrict pb = na_b ? NULL : INTEGER(b);
+    const int *restrict pna = na_n ? NULL : INTEGER(na);
+    const int na = NA_INTEGER;
     #pragma omp parallel for num_threads(getDTthreads(len0, true))
     for (int64_t i=0; i<len0; ++i) {
-      pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
+      pans[i] = pl[i]==0 ?
+                (na_b ? na : pb[i & bmask]) :
+                pl[i]==1 ? 
+                (na_a ? na : pa[i & amask]) : 
+                (na_n ? na : pna[i & nmask]);
     }
   } break;
   case REALSXP: {
     double *restrict pans = REAL(ans);
-    const double *restrict pa   = REAL(a);
-    const double *restrict pb   = REAL(b);
-    const double na_double = Rinherits(a, char_integer64) ? NA_INT64_D : NA_REAL; // Rinherits() is true for nanotime
-    const double pna = nonna ? REAL(na)[0] : na_double;
+    const double *restrict pa = na_a ? NULL : REAL(a);
+    const double *restrict pb = na_b ? NULL : REAL(b);
+    const double *restrict pna = na_n ? NULL : REAL(na);
+    const double na = Rinherits(a, char_integer64) ? NA_INT64_D : NA_REAL; // Rinherits() is true for nanotime
     #pragma omp parallel for num_threads(getDTthreads(len0, true))
     for (int64_t i=0; i<len0; ++i) {
-      pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
+      pans[i] = pl[i]==0 ?
+                (na_b ? na : pb[i & bmask]) :
+                pl[i]==1 ? 
+                (na_a ? na : pa[i & amask]) : 
+                (na_n ? na : pna[i & nmask]);
     }
   } break;
   case STRSXP : {
-    const SEXP *restrict pa = STRING_PTR(a);
-    const SEXP *restrict pb = STRING_PTR(b);
-    const SEXP pna = nonna ? STRING_PTR(na)[0] : NA_STRING;
+    const SEXP *restrict pa = na_a ? NULL : STRING_PTR(a);
+    const SEXP *restrict pb = na_b ? NULL : STRING_PTR(b);
+    const SEXP *restrict pna = na_n ? NULL : STRING_PTR(na);
+    const SEXP na = NA_STRING;
     for (int64_t i=0; i<len0; ++i) {
-      SET_STRING_ELT(ans, i, pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna));
+      SET_STRING_ELT(
+        ans, i, pl[i]==0 ?
+                (na_b ? na : pb[i & bmask]) :
+                pl[i]==1 ? 
+                (na_a ? na : pa[i & amask]) : 
+                (na_n ? na : pna[i & nmask])
+      );
     }
   } break;
   case CPLXSXP : {
     Rcomplex *restrict pans = COMPLEX(ans);
-    const Rcomplex *restrict pa   = COMPLEX(a);
-    const Rcomplex *restrict pb   = COMPLEX(b);
-    const Rcomplex pna = nonna ? COMPLEX(na)[0] : NA_CPLX;
+    const Rcomplex *restrict pa = na_a ? NULL : COMPLEX(a);
+    const Rcomplex *restrict pb = na_b ? NULL : COMPLEX(b);
+    const Rcomplex *restrict pna = na_n ? NULL : COMPLEX(na);
+    const Rcomplex na = NA_CPLX;
     #pragma omp parallel for num_threads(getDTthreads(len0, true))
     for (int64_t i=0; i<len0; ++i) {
-      pans[i] = pl[i]==0 ? pb[i & bmask] : (pl[i]==1 ? pa[i & amask] : pna);
+      pans[i] = pl[i]==0 ?
+                (na_b ? na : pb[i & bmask]) :
+                pl[i]==1 ? 
+                (na_a ? na : pa[i & amask]) : 
+                (na_n ? na : pna[i & nmask]);
     }
   } break;
   case VECSXP : {
-    const SEXP *restrict pa = SEXPPTR_RO(a);
-    const SEXP *restrict pb = SEXPPTR_RO(b);
-    const SEXP *restrict pna = SEXPPTR_RO(na);
+    const SEXP *restrict pa = na_a ? NULL : SEXPPTR_RO(a);
+    const SEXP *restrict pb = na_b ? NULL : SEXPPTR_RO(b);
+    const SEXP *restrict pna = na_n ? NULL : SEXPPTR_RO(na);
     for (int64_t i=0; i<len0; ++i) {
-      if (pl[i]==NA_LOGICAL) {
-        if (nonna)
-          SET_VECTOR_ELT(ans, i, pna[0]);
-        continue; // allocVector already initialized with R_NilValue
+      if (pl[i] == NA_LOGICAL) {
+        if (!na_n) SET_VECTOR_ELT(ans, i, pna[i & nmask]);
+      } else if (pl[i]==0) {
+        if (!na_b) SET_VECTOR_ELT(ans, i, pb[i & bmask]);
+      } else if (pl[i]==1) {
+        if (!na_a) SET_VECTOR_ELT(ans, i, pa[i & amask]);
       }
-      SET_VECTOR_ELT(ans, i, pl[i]==0 ? pb[i & bmask] : pa[i & amask]);
     }
   } break;
   default:
@@ -154,7 +209,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
             "Note that the default argument must be named explicitly, e.g., default=0"), narg);
   }
   if (narg==0) return R_NilValue;
-  
+
   SEXP cons0 = PROTECT(eval(SEXPPTR_RO(args)[0], rho));
   SEXP value0 = PROTECT(eval(SEXPPTR_RO(args)[1], rho)); // value0 will be compared to from loop so leave it protected throughout
   SEXPTYPE type0 = TYPEOF(value0);
@@ -167,7 +222,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
   SEXP tracker = PROTECT(allocVector(INTSXP, len0));
   int *restrict p = INTEGER(tracker);
   copyMostAttrib(value0, ans);
-  
+
   bool nonna=!isNull(na);
   if (nonna) {
     if (xlength(na) != 1) {
@@ -194,7 +249,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) {
       }
     }
   }
-  
+
   const int n = narg/2;
   for (int i=0; i<n; ++i) {
     SEXP cons = PROTECT(i==0 ? cons0 : eval(SEXPPTR_RO(args)[2*i], rho)); // protect cons0 again for easy unprotect at the end of this loop
diff --git a/src/fmelt.c b/src/fmelt.c
index 22a4ac1fc5..8c204cb5ce 100644
--- a/src/fmelt.c
+++ b/src/fmelt.c
@@ -104,17 +104,17 @@ SEXP measurelist(SEXP measure, SEXP dtnames) {
   for (int i=0; i<n; ++i) {
     SEXP x = VECTOR_ELT(measure, i);
     switch(TYPEOF(x)) {
-      case STRSXP  :
-        SET_VECTOR_ELT(ans, i, chmatch(x, dtnames, 0));
-        break;
-      case REALSXP :
-        SET_VECTOR_ELT(ans, i, coerceVector(x, INTSXP));
-        break;
-      case INTSXP  :
-        SET_VECTOR_ELT(ans, i, x);
-        break;
-      default :
-        error(_("Unknown 'measure.vars' type %s at index %d of list"), type2char(TYPEOF(x)), i+1);
+    case STRSXP  :
+      SET_VECTOR_ELT(ans, i, chmatch(x, dtnames, NA_INTEGER));
+      break;
+    case REALSXP :
+      SET_VECTOR_ELT(ans, i, coerceVector(x, INTSXP));
+      break;
+    case INTSXP  :
+      SET_VECTOR_ELT(ans, i, x);
+      break;
+    default :
+      error(_("Unknown 'measure.vars' type %s at index %d of list"), type2char(TYPEOF(x)), i+1);
     }
   }
   UNPROTECT(1);
@@ -138,19 +138,23 @@ static SEXP unlist_(SEXP xint) {
   return(ans);
 }
 
+bool invalid_measure(int i, int ncol) {
+  return (i<=0 && i!=NA_INTEGER) || i>ncol;
+}
+
 SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
-  int i, ncol=LENGTH(DT), targetcols=0, protecti=0, u=0, v=0;
+  int ncol=LENGTH(DT), targetcols=0, protecti=0, u=0, v=0;
   SEXP thiscol, idcols = R_NilValue, valuecols = R_NilValue, tmp, tmp2, booltmp, unqtmp, ans;
   SEXP dtnames = PROTECT(getAttrib(DT, R_NamesSymbol)); protecti++;
 
   if (isNull(id) && isNull(measure)) {
-    for (i=0; i<ncol; i++) {
+    for (int i=0; i<ncol; ++i) {
       thiscol = VECTOR_ELT(DT, i);
       if ((isInteger(thiscol) || isNumeric(thiscol) || isLogical(thiscol)) && !isFactor(thiscol)) targetcols++;
     }
     idcols = PROTECT(allocVector(INTSXP, ncol-targetcols)); protecti++;
     tmp = PROTECT(allocVector(INTSXP, targetcols)); protecti++;
-    for (i=0; i<ncol; i++) {
+    for (int i=0; i<ncol; ++i) {
       thiscol = VECTOR_ELT(DT, i);
       if ((isInteger(thiscol) || isNumeric(thiscol) || isLogical(thiscol)) && !isFactor(thiscol)) {
         INTEGER(tmp)[u++] = i+1;
@@ -162,13 +166,13 @@ SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
     warning(_("id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are considered id.vars, which in this case are columns [%s]. Consider providing at least one of 'id' or 'measure' vars in future."), concat(dtnames, idcols));
   } else if (!isNull(id) && isNull(measure)) {
     switch(TYPEOF(id)) {
-      case STRSXP  : PROTECT(tmp = chmatch(id, dtnames, 0)); protecti++; break;
-      case REALSXP : PROTECT(tmp = coerceVector(id, INTSXP)); protecti++; break;
-      case INTSXP  : tmp = id; break;
-      default : error(_("Unknown 'id.vars' type %s, must be character or integer vector"), type2char(TYPEOF(id)));
+    case STRSXP  : PROTECT(tmp = chmatch(id, dtnames, 0)); protecti++; break;
+    case REALSXP : PROTECT(tmp = coerceVector(id, INTSXP)); protecti++; break;
+    case INTSXP  : tmp = id; break;
+    default : error(_("Unknown 'id.vars' type %s, must be character or integer vector"), type2char(TYPEOF(id)));
     }
     booltmp = PROTECT(duplicated(tmp, FALSE)); protecti++;
-    for (i=0; i<length(tmp); i++) {
+    for (int i=0; i<length(tmp); ++i) {
       if (INTEGER(tmp)[i] <= 0 || INTEGER(tmp)[i] > ncol)
         error(_("One or more values in 'id.vars' is invalid."));
       else if (!LOGICAL(booltmp)[i]) targetcols++;
@@ -176,7 +180,7 @@ SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
     }
     unqtmp = PROTECT(allocVector(INTSXP, targetcols)); protecti++;
     u = 0;
-    for (i=0; i<length(booltmp); i++) {
+    for (int i=0; i<length(booltmp); ++i) {
       if (!LOGICAL(booltmp)[i]) {
         INTEGER(unqtmp)[u++] = INTEGER(tmp)[i];
       }
@@ -191,26 +195,26 @@ SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
     }
   } else if (isNull(id) && !isNull(measure)) {
     switch(TYPEOF(measure)) {
-      case STRSXP  : tmp2 = PROTECT(chmatch(measure, dtnames, 0)); protecti++; break;
-      case REALSXP : tmp2 = PROTECT(coerceVector(measure, INTSXP)); protecti++; break;
-      case INTSXP  : tmp2 = measure; break;
-      case VECSXP  : tmp2 = PROTECT(measurelist(measure, dtnames)); protecti++; break;
-      default : error(_("Unknown 'measure.vars' type %s, must be character or integer vector/list"), type2char(TYPEOF(measure)));
+    case STRSXP  : tmp2 = PROTECT(chmatch(measure, dtnames, 0)); protecti++; break;
+    case REALSXP : tmp2 = PROTECT(coerceVector(measure, INTSXP)); protecti++; break;
+    case INTSXP  : tmp2 = measure; break;
+    case VECSXP  : tmp2 = PROTECT(measurelist(measure, dtnames)); protecti++; break;
+    default : error(_("Unknown 'measure.vars' type %s, must be character or integer vector/list"), type2char(TYPEOF(measure)));
     }
     tmp = tmp2;
     if (isNewList(measure)) {
       tmp = PROTECT(unlist_(tmp2)); protecti++;
     }
     booltmp = PROTECT(duplicated(tmp, FALSE)); protecti++;
-    for (i=0; i<length(tmp); i++) {
-      if (INTEGER(tmp)[i] <= 0 || INTEGER(tmp)[i] > ncol)
+    for (int i=0; i<length(tmp); ++i) {
+      if (invalid_measure(INTEGER(tmp)[i], ncol))
         error(_("One or more values in 'measure.vars' is invalid."));
       else if (!LOGICAL(booltmp)[i]) targetcols++;
       else continue;
     }
     unqtmp = PROTECT(allocVector(INTSXP, targetcols)); protecti++;
     u = 0;
-    for (i=0; i<length(booltmp); i++) {
+    for (int i=0; i<length(booltmp); ++i) {
       if (!LOGICAL(booltmp)[i]) {
         INTEGER(unqtmp)[u++] = INTEGER(tmp)[i];
       }
@@ -227,29 +231,29 @@ SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
     }
   } else if (!isNull(id) && !isNull(measure)) {
     switch(TYPEOF(id)) {
-      case STRSXP  : tmp = PROTECT(chmatch(id, dtnames, 0)); protecti++; break;
-      case REALSXP : tmp = PROTECT(coerceVector(id, INTSXP)); protecti++; break;
-      case INTSXP  : tmp = id; break;
-      default : error(_("Unknown 'id.vars' type %s, must be character or integer vector"), type2char(TYPEOF(id)));
+    case STRSXP  : tmp = PROTECT(chmatch(id, dtnames, 0)); protecti++; break;
+    case REALSXP : tmp = PROTECT(coerceVector(id, INTSXP)); protecti++; break;
+    case INTSXP  : tmp = id; break;
+    default : error(_("Unknown 'id.vars' type %s, must be character or integer vector"), type2char(TYPEOF(id)));
     }
-    for (i=0; i<length(tmp); i++) {
+    for (int i=0; i<length(tmp); ++i) {
       if (INTEGER(tmp)[i] <= 0 || INTEGER(tmp)[i] > ncol)
         error(_("One or more values in 'id.vars' is invalid."));
     }
     idcols = PROTECT(tmp); protecti++;
     switch(TYPEOF(measure)) {
-      case STRSXP  : tmp2 = PROTECT(chmatch(measure, dtnames, 0)); protecti++; break;
-      case REALSXP : tmp2 = PROTECT(coerceVector(measure, INTSXP)); protecti++; break;
-      case INTSXP  : tmp2 = measure; break;
-      case VECSXP  : tmp2 = PROTECT(measurelist(measure, dtnames)); protecti++; break;
-      default : error(_("Unknown 'measure.vars' type %s, must be character or integer vector"), type2char(TYPEOF(measure)));
+    case STRSXP  : tmp2 = PROTECT(chmatch(measure, dtnames, 0)); protecti++; break;
+    case REALSXP : tmp2 = PROTECT(coerceVector(measure, INTSXP)); protecti++; break;
+    case INTSXP  : tmp2 = measure; break;
+    case VECSXP  : tmp2 = PROTECT(measurelist(measure, dtnames)); protecti++; break;
+    default : error(_("Unknown 'measure.vars' type %s, must be character or integer vector"), type2char(TYPEOF(measure)));
     }
     tmp = tmp2;
     if (isNewList(measure)) {
       tmp = PROTECT(unlist_(tmp2)); protecti++;
     }
-    for (i=0; i<length(tmp); i++) {
-      if (INTEGER(tmp)[i] <= 0 || INTEGER(tmp)[i] > ncol)
+    for (int i=0; i<length(tmp); ++i) {
+      if (invalid_measure(INTEGER(tmp)[i], ncol))
         error(_("One or more values in 'measure.vars' is invalid."));
     }
     if (isNewList(measure)) valuecols = tmp2;
@@ -260,26 +264,35 @@ SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) {
   }
   ans = PROTECT(allocVector(VECSXP, 2)); protecti++;
   SET_VECTOR_ELT(ans, 0, idcols);
-  SET_VECTOR_ELT(ans, 1, valuecols);
+  SET_VECTOR_ELT(ans, 1, valuecols);//List of integer vectors.
   UNPROTECT(protecti);
   return(ans);
 }
 
 struct processData {
-  SEXP RCHK;  // a 2 item list holding vars (result of checkVars) and naidx. PROTECTed up in fmelt so that preprocess() doesn't need to PROTECT. To pass rchk, #2865
-  SEXP idcols, valuecols, naidx; // convenience pointers into RCHK[0][0], RCHK[0][1] and RCHK[1] respectively
-  int lids, lvalues, lmax, lmin, totlen, nrow;
-  int *isfactor, *leach, *isidentical;
+  SEXP RCHK;      // a 2 item list holding vars (result of checkVars) and naidx. PROTECTed up in fmelt so that preprocess() doesn't need to PROTECT. To pass rchk, #2865
+  SEXP idcols,    // convenience pointers into RCHK[0][0], RCHK[0][1] and RCHK[1] respectively
+    variable_table, // NULL or data for variable column(s).
+    valuecols,    // list with one element per output/value column, each element is an integer vector.
+    naidx;
+  int *isfactor,
+    *leach,       // length of each element of the valuecols(measure.vars) list.
+    *isidentical; // are all inputs for this value column the same type?
+  int lids,       // number of id columns.
+    lvars,        // number of variable columns.
+    lvalues,      // number of value columns.
+    lmax,         // max length of valuecols elements / number of times to repeat ids.
+    totlen,       // of output/long DT result of melt operation.
+    nrow;         // of input/wide DT to be melted.
   SEXPTYPE *maxtype;
-  Rboolean narm;
+  Rboolean narm;  // remove missing values?
 };
 
 static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valnames, Rboolean narm, Rboolean verbose, struct processData *data) {
 
   SEXP vars,tmp,thiscol;
   SEXPTYPE type;
-  int i,j;
-  data->lmax = 0; data->lmin = 0; data->totlen = 0; data->nrow = length(VECTOR_ELT(DT, 0));
+  data->lmax = 0; data->totlen = 0; data->nrow = length(VECTOR_ELT(DT, 0));
   SET_VECTOR_ELT(data->RCHK, 0, vars = checkVars(DT, id, measure, verbose));
   data->idcols = VECTOR_ELT(vars, 0);
   data->valuecols = VECTOR_ELT(vars, 1);
@@ -296,35 +309,63 @@ static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valna
   data->isidentical = (int *)R_alloc(data->lvalues, sizeof(int));
   data->isfactor = (int *)R_alloc(data->lvalues, sizeof(int));
   data->maxtype = (SEXPTYPE *)R_alloc(data->lvalues, sizeof(SEXPTYPE));
-  for (i=0; i<data->lvalues; i++) {
+  // first find max type of each output column.
+  for (int i=0; i<data->lvalues; ++i) { // for each output column.
     tmp = VECTOR_ELT(data->valuecols, i);
     data->leach[i] = length(tmp);
     data->isidentical[i] = 1;  // TODO - why 1 and not Rboolean TRUE?
     data->isfactor[i] = 0;  // seems to hold 2 below, so not an Rboolean FALSE here. TODO - better name for variable?
     data->maxtype[i] = 0;   // R_alloc doesn't initialize so careful to here, relied on below
     data->lmax = (data->lmax > data->leach[i]) ? data->lmax : data->leach[i];
-    data->lmin = (data->lmin < data->leach[i]) ? data->lmin : data->leach[i];
-    for (j=0; j<data->leach[i]; j++) {
-      thiscol = VECTOR_ELT(DT, INTEGER(tmp)[j]-1);
-      if (isFactor(thiscol)) {
-        data->isfactor[i] = (isOrdered(thiscol)) ? 2 : 1;
-        data->maxtype[i]  = STRSXP;
-      } else {
-        type = TYPEOF(thiscol);
-        if (type > data->maxtype[i]) data->maxtype[i] = type;
+    for (int j=0; j<data->leach[i]; ++j) { // for each input column.
+      int this_col_num = INTEGER(tmp)[j];
+      if(this_col_num != NA_INTEGER){
+        thiscol = VECTOR_ELT(DT, this_col_num-1);
+        if (isFactor(thiscol)) {
+          data->isfactor[i] = (isOrdered(thiscol)) ? 2 : 1;
+          data->maxtype[i]  = STRSXP;
+        } else {
+          type = TYPEOF(thiscol);
+          if (type > data->maxtype[i]) data->maxtype[i] = type;
+        }
       }
     }
-    for (j=0; j<data->leach[i]; j++) {
-      thiscol = VECTOR_ELT(DT, INTEGER(tmp)[j]-1);
-      if ( (!isFactor(thiscol) && data->maxtype[i] != TYPEOF(thiscol)) || (isFactor(thiscol) && data->maxtype[i] != STRSXP) ) {
-        data->isidentical[i] = 0;
-        break;
+    // then compute isidentical for this output column.
+    for (int j=0; j<data->leach[i]; ++j) {
+      int this_col_num = INTEGER(tmp)[j];
+      if(this_col_num != NA_INTEGER){
+        thiscol = VECTOR_ELT(DT, this_col_num-1);
+        if ( (!isFactor(thiscol) && data->maxtype[i] != TYPEOF(thiscol)) ||
+             (isFactor(thiscol) && data->maxtype[i] != STRSXP) ) {
+          data->isidentical[i] = 0;
+          break;
+        }
       }
     }
   }
   if (data->narm) {
     SET_VECTOR_ELT(data->RCHK, 1, data->naidx = allocVector(VECSXP, data->lmax));
   }
+  // TDH 1 Oct 2020 variable table.
+  data->variable_table = getAttrib(measure, sym_variable_table);
+  if (isNull(data->variable_table)) {
+    // We need to include this check first because isNewList(NULL) ==
+    // TRUE
+    data->lvars = 1;
+  } else if (isNewList(data->variable_table)) {
+    data->lvars = length(data->variable_table);
+    if (data->lvars == 0) {
+      error(_("variable_table attribute of measure.vars should be a data table with at least one column"));
+    }
+    for (int i=0; i<length(data->variable_table); ++i) {
+      int nrow = length(VECTOR_ELT(data->variable_table, i));
+      if (data->lmax != nrow) {
+	error(_("variable_table attribute of measure.vars should be a data table with same number of rows as max length of measure.vars vectors =%d"), data->lmax);
+      }
+    }
+  } else {//neither NULL nor DT.
+    error(_("variable_table attribute of measure.vars should be either NULL or a data table"));
+  }
 }
 
 static SEXP combineFactorLevels(SEXP factorLevels, SEXP target, int * factorType, Rboolean * isRowOrdered)
@@ -392,27 +433,29 @@ static SEXP combineFactorLevels(SEXP factorLevels, SEXP target, int * factorType
   return ans;
 }
 
+SEXP input_col_or_na(SEXP DT, struct processData* data, SEXP thisvaluecols, int out_col, int in_col) {
+  if (in_col < data->leach[out_col]) {
+    int input_column_num = INTEGER(thisvaluecols)[in_col];
+    if (input_column_num != NA_INTEGER) {
+      return VECTOR_ELT(DT, input_column_num-1);
+    }
+  }
+  return allocNAVector(data->maxtype[out_col], data->nrow);
+}
+
 SEXP getvaluecols(SEXP DT, SEXP dtnames, Rboolean valfactor, Rboolean verbose, struct processData *data) {
   for (int i=0; i<data->lvalues; ++i) {
     SEXP thisvaluecols = VECTOR_ELT(data->valuecols, i);
     if (!data->isidentical[i])
       warning(_("'measure.vars' [%s] are not all of the same type. By order of hierarchy, the molten data value column will be of type '%s'. All measure variables not of type '%s' will be coerced too. Check DETAILS in ?melt.data.table for more on coercion.\n"), concat(dtnames, thisvaluecols), type2char(data->maxtype[i]), type2char(data->maxtype[i]));
-    if (data->maxtype[i] == VECSXP && data->narm) {
-      if (verbose) Rprintf(_("The molten data value type is a list at item %d. 'na.rm=TRUE' is ignored.\n"), i+1);
-      data->narm = FALSE;
-    }
   }
   if (data->narm) {
     SEXP seqcols = PROTECT(seq_int(data->lvalues, 1));
     for (int i=0; i<data->lmax; ++i) {
       SEXP tmp = PROTECT(allocVector(VECSXP, data->lvalues));
       for (int j=0; j<data->lvalues; ++j) {
-        if (i < data->leach[j]) {
-          SEXP thisvaluecols = VECTOR_ELT(data->valuecols, j);
-          SET_VECTOR_ELT(tmp, j, VECTOR_ELT(DT, INTEGER(thisvaluecols)[i]-1));
-        } else {
-          SET_VECTOR_ELT(tmp, j, allocNAVector(data->maxtype[j], data->nrow));
-        }
+        SEXP thisvaluecols = VECTOR_ELT(data->valuecols, j);
+        SET_VECTOR_ELT(tmp, j, input_col_or_na(DT, data, thisvaluecols, j, i));
       }
       tmp = PROTECT(dt_na(tmp, seqcols));
       SEXP w;
@@ -427,18 +470,17 @@ SEXP getvaluecols(SEXP DT, SEXP dtnames, Rboolean valfactor, Rboolean verbose, s
   SEXP flevels = PROTECT(allocVector(VECSXP, data->lmax));
   Rboolean *isordered = (Rboolean *)R_alloc(data->lmax, sizeof(Rboolean));
   SEXP ansvals = PROTECT(allocVector(VECSXP, data->lvalues));
-  for (int i=0; i<data->lvalues; ++i) {
+  for (int i=0; i<data->lvalues; ++i) {//for each output/value column.
     bool thisvalfactor = (data->maxtype[i] == VECSXP) ? false : valfactor;
     SEXP target = PROTECT(allocVector(data->maxtype[i], data->totlen)); // to keep rchk happy
     SET_VECTOR_ELT(ansvals, i, target);
     UNPROTECT(1);  // still protected by virtue of being member of protected ansval.
-    SEXP thisvaluecols = VECTOR_ELT(data->valuecols, i);
+    SEXP thisvaluecols = VECTOR_ELT(data->valuecols, i); // integer vector of column ids.
     int counter = 0;
     bool copyattr = false;
-    for (int j=0; j<data->lmax; ++j) {
+    for (int j=0; j<data->lmax; ++j) {// for each input column.
       int thisprotecti = 0;
-      SEXP thiscol = (j < data->leach[i]) ? VECTOR_ELT(DT, INTEGER(thisvaluecols)[j]-1)
-                       : allocNAVector(data->maxtype[i], data->nrow);
+      SEXP thiscol = input_col_or_na(DT, data, thisvaluecols, i, j);
       if (!copyattr && data->isidentical[i] && !data->isfactor[i]) {
         copyMostAttrib(thiscol, target);
         copyattr = true;
@@ -522,78 +564,99 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str
   // reworked in PR#3455 to create character/factor directly for efficiency, and handle duplicates (#1754)
   // data->nrow * data->lmax == data->totlen
   int protecti=0;
-  SEXP ansvars=PROTECT(allocVector(VECSXP, 1)); protecti++;
+  SEXP ansvars=PROTECT(allocVector(VECSXP, data->lvars)); protecti++;
   SEXP target;
   if (data->lvalues==1 && length(VECTOR_ELT(data->valuecols, 0)) != data->lmax)
     error(_("Internal error: fmelt.c:getvarcols %d %d"), length(VECTOR_ELT(data->valuecols, 0)), data->lmax);  // # nocov
-  if (!varfactor) {
-    SET_VECTOR_ELT(ansvars, 0, target=allocVector(STRSXP, data->totlen));
-    if (data->lvalues == 1) {
-      const int *thisvaluecols = INTEGER(VECTOR_ELT(data->valuecols, 0));
-      for (int j=0, ansloc=0; j<data->lmax; ++j) {
-        const int thislen = data->narm ? length(VECTOR_ELT(data->naidx, j)) : data->nrow;
-        SEXP str = STRING_ELT(dtnames, thisvaluecols[j]-1);
-        for (int k=0; k<thislen; ++k) SET_STRING_ELT(target, ansloc++, str);
+  if (isNull(data->variable_table)) {
+    if (!varfactor) {
+      SET_VECTOR_ELT(ansvars, 0, target=allocVector(STRSXP, data->totlen));
+      if (data->lvalues == 1) {//one value column to output.
+        const int *thisvaluecols = INTEGER(VECTOR_ELT(data->valuecols, 0));
+        for (int j=0, ansloc=0; j<data->lmax; ++j) {
+          const int thislen = data->narm ? length(VECTOR_ELT(data->naidx, j)) : data->nrow;
+          SEXP str = STRING_ELT(dtnames, thisvaluecols[j]-1);
+          for (int k=0; k<thislen; ++k) SET_STRING_ELT(target, ansloc++, str);
+        }
+      } else {//multiple value columns to output.
+        for (int j=0, ansloc=0, level=1; j<data->lmax; ++j) {
+          const int thislen = data->narm ? length(VECTOR_ELT(data->naidx, j)) : data->nrow;
+          char buff[20];
+          snprintf(buff, 20, "%d", level++);
+          for (int k=0; k<thislen; ++k) SET_STRING_ELT(target, ansloc++, mkChar(buff));
+        }
       }
-    } else {
-      for (int j=0, ansloc=0, level=1; j<data->lmax; ++j) {
-        const int thislen = data->narm ? length(VECTOR_ELT(data->naidx, j)) : data->nrow;
-        if (thislen==0) continue;  // so as not to bump level
-        char buff[20];
-        snprintf(buff, 20, "%d", level++);
-        SEXP str = PROTECT(mkChar(buff));
-        for (int k=0; k<thislen; ++k) SET_STRING_ELT(target, ansloc++, str);
-        UNPROTECT(1);
+    } else {// varfactor==TRUE
+      SET_VECTOR_ELT(ansvars, 0, target=allocVector(INTSXP, data->totlen));
+      SEXP levels;
+      int *td = INTEGER(target);
+      if (data->lvalues == 1) {//one value column to output.
+        SEXP thisvaluecols = VECTOR_ELT(data->valuecols, 0);
+        int len = length(thisvaluecols);
+        levels = PROTECT(allocVector(STRSXP, len)); protecti++;
+        const int *vd = INTEGER(thisvaluecols);
+        for (int j=0; j<len; ++j) SET_STRING_ELT(levels, j, STRING_ELT(dtnames, vd[j]-1));
+        SEXP m = PROTECT(chmatch(levels, levels, 0)); protecti++;  // do we have any dups?
+        int numRemove = 0;  // remove dups and any for which narm and all-NA
+        int *md = INTEGER(m);
+        for (int j=0; j<len; ++j) {
+          if (md[j]!=j+1 /*dup*/ || (data->narm && length(VECTOR_ELT(data->naidx, j))==0)) { numRemove++; md[j]=0; }
+        }
+        if (numRemove) {
+          SEXP newlevels = PROTECT(allocVector(STRSXP, len-numRemove)); protecti++;
+          for (int i=0, loc=0; i<len; ++i) if (md[i]!=0) { SET_STRING_ELT(newlevels, loc++, STRING_ELT(levels, i)); }
+          m = PROTECT(chmatch(levels, newlevels, 0)); protecti++;  // budge up the gaps
+          md = INTEGER(m);
+          levels = newlevels;
+        }
+        for (int j=0, ansloc=0; j<data->lmax; ++j) {
+          const int thislen = data->narm ? length(VECTOR_ELT(data->naidx, j)) : data->nrow;
+          for (int k=0; k<thislen; ++k) td[ansloc++] = md[j];
+        }
+      } else {//multiple output columns.
+        int nlevel=0;
+        levels = PROTECT(allocVector(STRSXP, data->lmax)); protecti++;
+        for (int j=0, ansloc=0; j<data->lmax; ++j) {
+          const int thislen = data->narm ? length(VECTOR_ELT(data->naidx, j)) : data->nrow;
+          char buff[20];
+          snprintf(buff, 20, "%d", nlevel+1);
+          SET_STRING_ELT(levels, nlevel++, mkChar(buff));  // generate levels = 1:nlevels
+          for (int k=0; k<thislen; ++k) td[ansloc++] = nlevel;
+        }
       }
+      setAttrib(target, R_LevelsSymbol, levels);
+      setAttrib(target, R_ClassSymbol, ScalarString(char_factor));
     }
-  } else {
-    SET_VECTOR_ELT(ansvars, 0, target=allocVector(INTSXP, data->totlen));
-    SEXP levels;
-    int *td = INTEGER(target);
-    if (data->lvalues == 1) {
-      SEXP thisvaluecols = VECTOR_ELT(data->valuecols, 0);
-      int len = length(thisvaluecols);
-      levels = PROTECT(allocVector(STRSXP, len)); protecti++;
-      const int *vd = INTEGER(thisvaluecols);
-      for (int j=0; j<len; ++j) SET_STRING_ELT(levels, j, STRING_ELT(dtnames, vd[j]-1));
-      SEXP m = PROTECT(chmatch(levels, levels, 0)); protecti++;  // do we have any dups?
-      int numRemove = 0;  // remove dups and any for which narm and all-NA
-      int *md = INTEGER(m);
-      for (int j=0; j<len; ++j) {
-        if (md[j]!=j+1 /*dup*/ || (data->narm && length(VECTOR_ELT(data->naidx, j))==0)) { numRemove++; md[j]=0; }
-      }
-      if (numRemove) {
-        SEXP newlevels = PROTECT(allocVector(STRSXP, len-numRemove)); protecti++;
-        for (int i=0, loc=0; i<len; ++i) if (md[i]!=0) { SET_STRING_ELT(newlevels, loc++, STRING_ELT(levels, i)); }
-        m = PROTECT(chmatch(levels, newlevels, 0)); protecti++;  // budge up the gaps
-        md = INTEGER(m);
-        levels = newlevels;
-      }
+  } else { //variable_table specified
+    for (int out_col_i=0; out_col_i<data->lvars; ++out_col_i) {
+      SEXP out_col = VECTOR_ELT(data->variable_table, out_col_i);
+      SET_VECTOR_ELT(ansvars, out_col_i, target=allocVector(TYPEOF(out_col), data->totlen));
       for (int j=0, ansloc=0; j<data->lmax; ++j) {
         const int thislen = data->narm ? length(VECTOR_ELT(data->naidx, j)) : data->nrow;
-        for (int k=0; k<thislen; ++k) td[ansloc++] = md[j];
-      }
-    } else {
-      int nlevel=0;
-      levels = PROTECT(allocVector(STRSXP, data->lmax)); protecti++;
-      for (int j=0, ansloc=0; j<data->lmax; ++j) {
-        const int thislen = data->narm ? length(VECTOR_ELT(data->naidx, j)) : data->nrow;
-        if (thislen==0) continue;  // so as not to bump level
-        char buff[20];
-        snprintf(buff, 20, "%d", nlevel+1);
-        SET_STRING_ELT(levels, nlevel++, mkChar(buff));  // generate levels = 1:nlevels
-        for (int k=0; k<thislen; ++k) td[ansloc++] = nlevel;
-      }
-      if (nlevel < data->lmax) {
-        // data->narm is true and there are some all-NA items causing at least one 'if (thislen==0) continue' above
-        // shrink the levels
-        SEXP newlevels = PROTECT(allocVector(STRSXP, nlevel)); protecti++;
-        for (int i=0; i<nlevel; ++i) SET_STRING_ELT(newlevels, i, STRING_ELT(levels, i));
-        levels = newlevels;
+        switch (TYPEOF(target)) {
+        case STRSXP :
+          for (int k=0; k<thislen; ++k)
+            SET_STRING_ELT(target, ansloc++, STRING_ELT(out_col, j));
+          break;
+        case REALSXP :
+          for (int k=0; k<thislen; ++k)
+            REAL(target)[ansloc++] = REAL(out_col)[j];
+          break;
+        case INTSXP :
+        case LGLSXP :
+          for (int k=0; k<thislen; ++k)
+            INTEGER(target)[ansloc++] = INTEGER(out_col)[j];
+          if (isFactor(out_col)) {
+            // Do we need a copy here?
+            setAttrib(target, R_LevelsSymbol, getAttrib(out_col, R_LevelsSymbol));
+            setAttrib(target, R_ClassSymbol, ScalarString(char_factor));
+          }
+          break;
+        default :
+          error(_("variable_table does not support column type '%s' for column '%s'."), type2char(TYPEOF(out_col)), CHAR(STRING_ELT(getAttrib(data->variable_table, R_NamesSymbol), out_col_i)));
+        }
       }
     }
-    setAttrib(target, R_LevelsSymbol, levels);
-    setAttrib(target, R_ClassSymbol, ScalarString(char_factor));
   }
   UNPROTECT(protecti);
   return(ansvars);
@@ -715,22 +778,31 @@ SEXP fmelt(SEXP DT, SEXP id, SEXP measure, SEXP varfactor, SEXP valfactor, SEXP
     ansids  = PROTECT(getidcols(DT, dtnames, verbose, &data)); protecti++;
 
     // populate 'ans'
-    ans = PROTECT(allocVector(VECSXP, data.lids+1+data.lvalues)); protecti++; // 1 is for variable column
+    int ncol_ans = data.lids+data.lvars+data.lvalues;
+    ans = PROTECT(allocVector(VECSXP, ncol_ans)); protecti++; // 1 is for variable column
     for (int i=0; i<data.lids; i++) {
       SET_VECTOR_ELT(ans, i, VECTOR_ELT(ansids, i));
     }
-    SET_VECTOR_ELT(ans, data.lids, VECTOR_ELT(ansvars, 0));
+    for (int i=0; i<data.lvars; i++) {
+      SET_VECTOR_ELT(ans, data.lids+i, VECTOR_ELT(ansvars, i));
+    }
     for (int i=0; i<data.lvalues; i++) {
-      SET_VECTOR_ELT(ans, data.lids+1+i, VECTOR_ELT(ansvals, i));
+      SET_VECTOR_ELT(ans, data.lids+data.lvars+i, VECTOR_ELT(ansvals, i));
     }
     // fill in 'ansnames'
-    ansnames = PROTECT(allocVector(STRSXP, data.lids+1+data.lvalues)); protecti++;
+    ansnames = PROTECT(allocVector(STRSXP, ncol_ans)); protecti++;
     for (int i=0; i<data.lids; i++) {
       SET_STRING_ELT(ansnames, i, STRING_ELT(dtnames, INTEGER(data.idcols)[i]-1));
     }
-    SET_STRING_ELT(ansnames, data.lids, STRING_ELT(varnames, 0));
+    if (isNull(data.variable_table)) {
+      SET_STRING_ELT(ansnames, data.lids, STRING_ELT(varnames, 0));
+    } else {
+      for (int i=0; i<data.lvars; i++) {
+        SET_STRING_ELT(ansnames, data.lids+i, STRING_ELT(getAttrib(data.variable_table, R_NamesSymbol), i));
+      }
+    }
     for (int i=0; i<data.lvalues; i++) {
-      SET_STRING_ELT(ansnames, data.lids+1+i, STRING_ELT(valnames, i));
+      SET_STRING_ELT(ansnames, data.lids+data.lvars+i, STRING_ELT(valnames, i));
     }
     setAttrib(ans, R_NamesSymbol, ansnames);
   }
diff --git a/src/forder.c b/src/forder.c
index 850cb457c2..4ccdd549a9 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -216,7 +216,7 @@ int StrCmp(SEXP x, SEXP y)
   if (x == y) return 0;             // same cached pointer (including NA_STRING==NA_STRING)
   if (x == NA_STRING) return -1;    // x<y
   if (y == NA_STRING) return 1;     // x>y
-  return strcmp(CHAR(x), CHAR(y));  // bmerge calls ENC2UTF8 on x and y before passing here 
+  return strcmp(CHAR(x), CHAR(y));  // bmerge calls ENC2UTF8 on x and y before passing here
 }
 
 static void cradix_r(SEXP *xsub, int n, int radix)
@@ -452,7 +452,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
     if (by_i < 1 || by_i > length(DT))
       STOP(_("internal error: 'by' value %d out of range [1,%d]"), by_i, length(DT)); // # nocov # R forderv already catch that using C colnamesInt
     if ( nrow != length(VECTOR_ELT(DT, by_i-1)) )
-      STOP(_("Column %d is length %d which differs from length of column 1 (%d)\n"), INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow);
+      STOP(_("Column %d is length %d which differs from length of column 1 (%d), are you attempting to order by a list column?\n"), INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow);
     if (TYPEOF(VECTOR_ELT(DT, by_i-1)) == CPLXSXP) n_cplx++;
   }
   if (!isLogical(retGrpArg) || LENGTH(retGrpArg)!=1 || INTEGER(retGrpArg)[0]==NA_LOGICAL)
@@ -1257,7 +1257,7 @@ SEXP issorted(SEXP x, SEXP by)
   // returning NA when NA present, and is multi-column.
   // TODO: test in big steps first to return faster if unsortedness is at the end (a common case of rbind'ing data to end)
   // These are all sequential access to x, so quick and cache efficient. Could be parallel by checking continuity at batch boundaries.
-  
+
   if (!isNull(by) && !isInteger(by)) STOP(_("Internal error: issorted 'by' must be NULL or integer vector"));
   if (isVectorAtomic(x) || length(by)==1) {
     // one-column special case is very common so specialize it by avoiding column-type switches inside the row-loop later
diff --git a/src/frank.c b/src/frank.c
index 565154c70d..810baf85c5 100644
--- a/src/frank.c
+++ b/src/frank.c
@@ -130,9 +130,9 @@ SEXP frank(SEXP xorderArg, SEXP xstartArg, SEXP xlenArg, SEXP ties_method) {
       }
       break;
     // case RUNLENGTH :
-    //   for (i = 0; i < length(xstartArg); i++) {
+    //   for (int i=0; i<length(xstartArg); ++i) {
     //     k=1;
-    //     for (j = xstart[i]-1; j < xstart[i]+xlen[i]-1; j++)
+    //     for (int j=xstart[i]-1; j<xstart[i]+xlen[i]-1; ++j)
     //       INTEGER(ans)[xorder[j]-1] = k++;
     //   }
     //   break;
@@ -145,19 +145,17 @@ SEXP frank(SEXP xorderArg, SEXP xstartArg, SEXP xlenArg, SEXP ties_method) {
 
 // internal version of anyNA for data.tables
 SEXP anyNA(SEXP x, SEXP cols) {
-  int i, j, n=0, elem;
-
+  int n=0;
   if (!isNewList(x)) error(_("Internal error. Argument 'x' to CanyNA is type '%s' not 'list'"), type2char(TYPEOF(x))); // #nocov
   if (!isInteger(cols)) error(_("Internal error. Argument 'cols' to CanyNA is type '%s' not 'integer'"), type2char(TYPEOF(cols))); // # nocov
-  for (i=0; i<LENGTH(cols); i++) {
-    elem = INTEGER(cols)[i];
+  for (int i=0; i<LENGTH(cols); ++i) {
+    const int elem = INTEGER(cols)[i];
     if (elem<1 || elem>LENGTH(x))
       error(_("Item %d of 'cols' is %d which is outside 1-based range [1,ncol(x)=%d]"), i+1, elem, LENGTH(x));
     if (!n) n = length(VECTOR_ELT(x, elem-1));
   }
-  SEXP ans = PROTECT(allocVector(LGLSXP, 1));
-  LOGICAL(ans)[0]=0;
-  for (i=0; i<LENGTH(cols); i++) {
+  int j=0; // did we get to the end of the column without finding any NA in it?
+  for (int i=0; i<LENGTH(cols); ++i) {
     SEXP v = VECTOR_ELT(x, INTEGER(cols)[i]-1);
     if (!length(v) || isNewList(v) || isList(v)) continue; // like stats:::na.omit.data.frame, skip list/pairlist columns
     if (n != length(v))
@@ -166,52 +164,38 @@ SEXP anyNA(SEXP x, SEXP cols) {
     switch (TYPEOF(v)) {
     case LGLSXP: {
       const int *iv = LOGICAL(v);
-      while(j < n && iv[j] != NA_LOGICAL) j++;
-      if (j < n) LOGICAL(ans)[0] = 1;
-    }
-      break;
+      while(j<n && iv[j]!=NA_LOGICAL) j++;
+    } break;
     case INTSXP: {
       const int *iv = INTEGER(v);
-      while(j < n && iv[j] != NA_INTEGER) j++;
-      if (j < n) LOGICAL(ans)[0] = 1;
-    }
-      break;
+      while(j<n && iv[j]!=NA_INTEGER) j++;
+    } break;
     case STRSXP: {
-      while (j < n && STRING_ELT(v, j) != NA_STRING) j++;
-      if (j < n) LOGICAL(ans)[0] = 1;
-    }
-      break;
-    case REALSXP: {
-      const double *dv = REAL(v);
+      const SEXP *sv = STRING_PTR(v);
+      while (j<n && sv[j]!=NA_STRING) j++;
+    } break;
+    case REALSXP:
       if (INHERITS(v, char_integer64)) {
-        for (j=0; j<n; j++) {
-          if (DtoLL(dv[j]) == NA_INT64_LL) {
-            LOGICAL(ans)[0] = 1;
-            break;
-          }
-        }
+        const int64_t *dv = (int64_t *)REAL(v);
+        while (j<n && dv[j]!=NA_INTEGER64) j++;
       } else {
-        while(j < n && !ISNAN(dv[j])) j++;
-        if (j < n) LOGICAL(ans)[0] = 1;
+        const double *dv = REAL(v);
+        while (j<n && !ISNAN(dv[j])) j++;
       }
-    }
       break;
-    case RAWSXP: {
-      // no such thing as a raw NA
-      // vector already initialised to all 0's
-    }
+    case RAWSXP:
+      // no such thing as a raw NA; vector already initialised to all 0's
+      j = n;
       break;
     case CPLXSXP: {
+      const Rcomplex *cv = COMPLEX(v);
       // taken from https://github.com/wch/r-source/blob/d75f39d532819ccc8251f93b8ab10d5b83aac89a/src/main/coerce.c
-      while (j < n && !ISNAN(COMPLEX(v)[j].r) && !ISNAN(COMPLEX(v)[j].i)) j++;
-      if (j < n) LOGICAL(ans)[0] = 1;
-    }
-      break;
+      while (j<n && !ISNAN(cv[j].r) && !ISNAN(cv[j].i)) j++;
+    } break;
     default:
       error(_("Unsupported column type '%s'"), type2char(TYPEOF(v)));
     }
-    if (LOGICAL(ans)[0]) break;
+    if (j<n) break; // don't look at any more columns; return true early
   }
-  UNPROTECT(1);
-  return(ans);
+  return ScalarLogical(j<n);
 }
diff --git a/src/fread.c b/src/fread.c
index 7b1ba6df03..da3e0e18ea 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -103,7 +103,8 @@ static void Field(FieldParseContext *ctx);
 #define ASSERT(cond, msg, ...) \
   if (!(cond)) STOP(_("Internal error in line %d of fread.c, please report on data.table GitHub:  " msg), __LINE__, __VA_ARGS__) // # nocov
 
-
+#define AS_DIGIT(x) (uint_fast8_t)(x - '0')
+#define IS_DIGIT(x) AS_DIGIT(x) < 10
 
 //=================================================================================================
 //
@@ -575,7 +576,7 @@ static void str_to_i32_core(const char **pch, int32_t *target)
 {
   const char *ch = *pch;
 
-  if (*ch=='0' && args.keepLeadingZeros && (uint_fast8_t)(ch[1]-'0')<10) return;
+  if (*ch=='0' && args.keepLeadingZeros && IS_DIGIT(ch[1])) return;
   bool neg = *ch=='-';
   ch += (neg || *ch=='+');
   const char *start = ch;  // to know if at least one digit is present
@@ -590,7 +591,7 @@ static void str_to_i32_core(const char **pch, int32_t *target)
   // number significant figures = digits from the first non-zero onwards including trailing zeros
   while (*ch=='0') ch++;
   uint_fast32_t sf = 0;
-  while ( (digit=(uint_fast8_t)(ch[sf]-'0'))<10 ) {
+  while ( (digit=AS_DIGIT(ch[sf]))<10 ) {
     acc = 10*acc + digit;
     sf++;
   }
@@ -619,7 +620,7 @@ static void StrtoI64(FieldParseContext *ctx)
 {
   const char *ch = *(ctx->ch);
   int64_t *target = (int64_t*) ctx->targets[sizeof(int64_t)];
-  if (*ch=='0' && args.keepLeadingZeros && (uint_fast8_t)(ch[1]-'0')<10) return;
+  if (*ch=='0' && args.keepLeadingZeros && IS_DIGIT(ch[1])) return;
   bool neg = *ch=='-';
   ch += (neg || *ch=='+');
   const char *start = ch;
@@ -627,7 +628,7 @@ static void StrtoI64(FieldParseContext *ctx)
   uint_fast64_t acc = 0;  // important unsigned not signed here; we now need the full unsigned range
   uint_fast8_t digit;
   uint_fast32_t sf = 0;
-  while ( (digit=(uint_fast8_t)(ch[sf]-'0'))<10 ) {
+  while ( (digit=AS_DIGIT(ch[sf]))<10 ) {
     acc = 10*acc + digit;
     sf++;
   }
@@ -677,7 +678,7 @@ static void parse_double_regular_core(const char **pch, double *target)
   #define FLOAT_MAX_DIGITS 18
   const char *ch = *pch;
 
-  if (*ch=='0' && args.keepLeadingZeros && (uint_fast8_t)(ch[1]-'0')<10) return;
+  if (*ch=='0' && args.keepLeadingZeros && IS_DIGIT(ch[1])) return;
   bool neg, Eneg;
   ch += (neg = *ch=='-') + (*ch=='+');
 
@@ -691,7 +692,7 @@ static void parse_double_regular_core(const char **pch, double *target)
   // Read the first, integer part of the floating number (but no more than
   // FLOAT_MAX_DIGITS digits).
   int_fast32_t sflimit = FLOAT_MAX_DIGITS;
-  while ((digit=(uint_fast8_t)(*ch-'0'))<10 && sflimit) {
+  while ((digit=AS_DIGIT(*ch))<10 && sflimit) {
     acc = 10*acc + digit;
     sflimit--;
     ch++;
@@ -701,8 +702,8 @@ static void parse_double_regular_core(const char **pch, double *target)
   // we will read and discard those extra digits, but only if they are followed
   // by a decimal point (otherwise it's a just big integer, which should be
   // treated as a string instead of losing precision).
-  if (sflimit==0 && (uint_fast8_t)(*ch-'0')<10) {
-    while ((uint_fast8_t)(*ch-'0')<10) {
+  if (sflimit==0 && IS_DIGIT(*ch)) {
+    while (IS_DIGIT(*ch)) {
       ch++;
       e++;
     }
@@ -725,7 +726,7 @@ static void parse_double_regular_core(const char **pch, double *target)
 
     // Now read the significant digits in the fractional part of the number
     int_fast32_t k = 0;
-    while ((digit=(uint_fast8_t)(ch[k]-'0'))<10 && sflimit) {
+    while ((digit=AS_DIGIT(ch[k]))<10 && sflimit) {
       acc = 10*acc + digit;
       k++;
       sflimit--;
@@ -735,7 +736,7 @@ static void parse_double_regular_core(const char **pch, double *target)
 
     // If more digits are present, skip them
     if (sflimit==0) {
-      while ((uint_fast8_t)(*ch-'0')<10) ch++;
+      while (IS_DIGIT(*ch)) ch++;
     }
     // Check that at least 1 digit was present in either the integer or
     // fractional part ("+1" here accounts for the decimal point char).
@@ -752,13 +753,13 @@ static void parse_double_regular_core(const char **pch, double *target)
     if (ch==start) goto fail;  // something valid must be between [+|-] and E, character E alone is invalid.
     ch += 1/*E*/ + (Eneg = ch[1]=='-') + (ch[1]=='+');
     int_fast32_t E = 0;
-    if ((digit=(uint_fast8_t)(*ch-'0'))<10) {
+    if ((digit=AS_DIGIT(*ch))<10) {
       E = digit;
       ch++;
-      if ((digit=(uint_fast8_t)(*ch-'0'))<10) {
+      if ((digit=AS_DIGIT(*ch))<10) {
         E = E*10 + digit;
         ch++;
-        if ((digit=(uint_fast8_t)(*ch-'0'))<10) {
+        if ((digit=AS_DIGIT(*ch))<10) {
           E = E*10 + digit;
           ch++;
         }
@@ -825,11 +826,11 @@ static void parse_double_extended(FieldParseContext *ctx)
   }
   if (ch[0]=='N' && (ch[1]=='A' || ch[1]=='a') && ch[2]=='N' && (ch += 3)) {
     if (ch[-2]=='a' && (*ch=='%' || *ch=='Q' || *ch=='S')) ch++;
-    while ((uint_fast8_t)(*ch-'0') < 10) ch++;
+    while (IS_DIGIT(*ch)) ch++;
     goto return_nan;
   }
   if ((ch[0]=='q' || ch[0]=='s') && ch[1]=='N' && ch[2]=='a' && ch[3]=='N' && (ch += 4)) {
-    while ((uint_fast8_t)(*ch-'0') < 10) ch++;
+    while (IS_DIGIT(*ch)) ch++;
     goto return_nan;
   }
   if (ch[0]=='1' && ch[1]=='.' && ch[2]=='#') {
@@ -915,7 +916,7 @@ static void parse_double_hexadecimal(FieldParseContext *ctx)
     acc <<= (13 - ndigits) * 4;
     ch += 1 + (Eneg = ch[1]=='-') + (ch[1]=='+');
     uint64_t E = 0;
-    while ((digit = (uint8_t)(*ch-'0')) < 10) {
+    while ((digit = AS_DIGIT(*ch)) < 10) {
       E = 10*E + digit;
       ch++;
     }
@@ -1056,7 +1057,7 @@ static void parse_iso8601_timestamp(FieldParseContext *ctx)
       if (!args.noTZasUTC)
         goto fail;
       // if neither Z nor UTC offset is present, then it's local time and that's not directly supported yet; see news for v1.13.0
-      // but user can specify that the unmarked datetimes are UTC by passing tz="UTC" 
+      // but user can specify that the unmarked datetimes are UTC by passing tz="UTC"
       // if local time is UTC (env variable TZ is "" or "UTC", not unset) then local time is UTC, and that's caught by fread at R level too
     }
   }
@@ -1079,7 +1080,7 @@ static void parse_bool_numeric(FieldParseContext *ctx)
 {
   const char *ch = *(ctx->ch);
   int8_t *target = (int8_t*) ctx->targets[sizeof(int8_t)];
-  uint8_t d = (uint8_t)(*ch - '0');  // '0'=>0, '1'=>1, everything else > 1
+  uint_fast8_t d = AS_DIGIT(*ch);  // '0'=>0, '1'=>1, everything else > 1
   if (d <= 1) {
     *target = (int8_t) d;
     *(ctx->ch) = ch + 1;
@@ -1616,7 +1617,7 @@ int freadMain(freadMainArgs _args) {
     int topSkip=0;            // how many rows to auto-skip
     const char *topStart=NULL;
 
-    for (quoteRule=quote?0:3; quoteRule<4; quoteRule++) {
+    for (quoteRule=quote?0:3; quoteRule<4; quoteRule++) { // #loop_counter_not_local_scope_ok
       // quote rule in order of preference.
       // when top is tied the first wins, so do all seps for the first quoteRule, then all seps for the second quoteRule, etc
       for (int s=0; s<nseps; s++) {
@@ -1705,7 +1706,7 @@ int freadMain(freadMainArgs _args) {
       sep = topSep;
       // no self healing quote rules, as we don't have >1 field to disambiguate
       // choose quote rule 0 or 1 based on for which 100 rows gets furthest into file
-      for (quoteRule=0; quoteRule<=1; quoteRule++) {
+      for (quoteRule=0; quoteRule<=1; quoteRule++) { // #loop_counter_not_local_scope_ok
         int thisRow=0, thisncol=0;
         ch = pos;
         while (ch<eof && ++thisRow<jumpLines && (thisncol=countfields(&ch))>=0) {};
@@ -2271,7 +2272,7 @@ int freadMain(freadMainArgs _args) {
             fun[abs(thisType)](&fctx);
             if (*tch!=sep) break;
             int8_t thisSize = size[j];
-            if (thisSize) ((char **) targets)[thisSize] += thisSize;  // 'if' for when rereading to avoid undefined NULL+0 
+            if (thisSize) ((char **) targets)[thisSize] += thisSize;  // 'if' for when rereading to avoid undefined NULL+0
             tch++;
             j++;
           }
diff --git a/src/fread.h b/src/fread.h
index 0f365511cc..c0e9669d01 100644
--- a/src/fread.h
+++ b/src/fread.h
@@ -145,7 +145,7 @@ typedef struct freadMainArgs
   bool logical01;
 
   bool keepLeadingZeros;
-  
+
   // should datetime with no Z or UTZ-offset be read as UTC?
   bool noTZasUTC;
 
diff --git a/src/freadR.c b/src/freadR.c
index 29c75db720..842baf00a3 100644
--- a/src/freadR.c
+++ b/src/freadR.c
@@ -122,12 +122,10 @@ SEXP freadR(
   else if (LOGICAL(headerArg)[0]==TRUE) args.header = true;
 
   args.nrowLimit = INT64_MAX;
-  // checked at R level
-  if (isReal(nrowLimitArg)) {
-    if (R_FINITE(REAL(nrowLimitArg)[0]) && REAL(nrowLimitArg)[0]>=0.0) args.nrowLimit = (int64_t)(REAL(nrowLimitArg)[0]);
-  } else {
-    if (INTEGER(nrowLimitArg)[0]>=1) args.nrowLimit = (int64_t)INTEGER(nrowLimitArg)[0];
-  }
+  if (!isReal(nrowLimitArg) || length(nrowLimitArg)!=1)
+    error(_("Internal error: freadR nrows not a single real. R level catches this."));  // # nocov
+  if (R_FINITE(REAL(nrowLimitArg)[0]) && REAL(nrowLimitArg)[0]>=0.0)
+    args.nrowLimit = (int64_t)(REAL(nrowLimitArg)[0]);
 
   args.logical01 = LOGICAL(logical01Arg)[0];
   {
@@ -337,7 +335,7 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
               type[i]=CT_STRING; // e.g. CT_ISO8601_DATE changed to character here so that as.POSIXct treats the date-only as local time in tests 1743.122 and 2150.11
               SET_STRING_ELT(colClassesAs, i, tt);
             }
-          } else { 
+          } else {
             type[i] = typeEnum[w-1];                           // freadMain checks bump up only not down
             if (w==NUT) SET_STRING_ELT(colClassesAs, i, tt);
           }
diff --git a/src/froll.c b/src/froll.c
index b044431ded..3ab7bd927a 100644
--- a/src/froll.c
+++ b/src/froll.c
@@ -49,8 +49,8 @@ void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
   long double w = 0.0;                                          // sliding window aggregate
   bool truehasna = hasna>0;                                     // flag to re-run with NA support if NAs detected
   if (!truehasna) {
-    int i;                                                      // iterator declared here because it is being used after foor loop
-    for (i=0; i<k-1; i++) {                                     // loop over leading observation, all partial window only
+    int i;                                                      // iterator declared here because it is being used after for loop
+    for (i=0; i<k-1; i++) {                                     // loop over leading observation, all partial window only; #loop_counter_not_local_scope_ok
       w += x[i];                                                // add current row to sliding window
       ans->dbl_v[i] = fill;                                     // answers are fill for partial window
     }
@@ -85,8 +85,8 @@ void frollmeanFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool
   }
   if (truehasna) {
     int nc = 0;                                                 // NA counter within sliding window
-    int i;                                                      // iterator declared here because it is being used after foor loop
-    for (i=0; i<k-1; i++) {                                     // loop over leading observation, all partial window only
+    int i;                                                      // iterator declared here because it is being used after for loop
+    for (i=0; i<k-1; i++) {                                     // loop over leading observation, all partial window only; #loop_counter_not_local_scope_ok
       if (R_FINITE(x[i])) {
         w += x[i];                                              // add only finite values to window aggregate
       } else {
@@ -253,7 +253,7 @@ void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
   bool truehasna = hasna>0;
   if (!truehasna) {
     int i;
-    for (i=0; i<k-1; i++) {
+    for (i=0; i<k-1; i++) { // #loop_counter_not_local_scope_ok
       w += x[i];
       ans->dbl_v[i] = fill;
     }
@@ -289,7 +289,7 @@ void frollsumFast(double *x, uint64_t nx, ans_t *ans, int k, double fill, bool n
   if (truehasna) {
     int nc = 0;
     int i;
-    for (i=0; i<k-1; i++) {
+    for (i=0; i<k-1; i++) { // #loop_counter_not_local_scope_ok
       if (R_FINITE(x[i])) {
         w += x[i];
       } else {
diff --git a/src/fsort.c b/src/fsort.c
index 5c1cf946e8..c50f8bc3eb 100644
--- a/src/fsort.c
+++ b/src/fsort.c
@@ -154,7 +154,7 @@ SEXP fsort(SEXP x, SEXP verboseArg) {
   // TODO: -0ULL should allow negatives
   //       avoid twiddle function call as expensive in recent tests (0.34 vs 2.7)
   //       possibly twiddle once to *ans, then untwiddle at the end in a fast parallel sweep
- 
+
   union {double d; uint64_t u64;} u;
   u.d = max;
   uint64_t maxULL = u.u64;
@@ -262,7 +262,7 @@ SEXP fsort(SEXP x, SEXP verboseArg) {
       double *restrict myworking = NULL;
       // the working memory for the largest group per thread is allocated when the thread receives its first iteration
       int myfirstmsb = -1;  // for the monotonicity check
-      
+
       #pragma omp for schedule(monotonic_dynamic,1)
       // We require here that a thread can never be assigned to an earlier iteration; e.g. threads 0:(nth-1)
       // get iterations 0:(nth-1), possibly out of order, then first-come-first-served in order after that.
@@ -291,7 +291,7 @@ SEXP fsort(SEXP x, SEXP verboseArg) {
         if (myfirstmsb==-1 || msb<myfirstmsb) {
           failed=true; non_monotonic=true; continue;  // # nocov
         }
-        
+
         // Depends on msbCounts being sorted largest first before this parallel loop
         // Could be significant RAM saving if the largest msb is
         // a lot larger than the 2nd largest msb, especially as nth grows to perhaps 128 on X1.
@@ -312,12 +312,12 @@ SEXP fsort(SEXP x, SEXP verboseArg) {
       free(myworking);
     }
     if (non_monotonic)
-      error("OpenMP %d did not assign threads to iterations monotonically. Please search Stack Overflow for this message.", MY_OPENMP); // # nocov; #4786 in v1.13.4
+      error(_("OpenMP %d did not assign threads to iterations monotonically. Please search Stack Overflow for this message."), MY_OPENMP); // # nocov; #4786 in v1.13.4
     if (alloc_fail)
       error(_("Unable to allocate working memory")); // # nocov
   }
   t[7] = wallclock();
-  
+
   // TODO: parallel sweep to check sorted using <= on original input. Feasible that twiddling messed up.
   //       After a few years of heavy use remove this check for speed, and move into unit tests.
   //       It's a perfectly contiguous and cache efficient parallel scan so should be relatively negligible.
diff --git a/src/fwriteR.c b/src/fwriteR.c
index a1cba686b4..b5f02f6a9e 100644
--- a/src/fwriteR.c
+++ b/src/fwriteR.c
@@ -5,18 +5,23 @@
 #define DATETIMEAS_EPOCH     2
 #define DATETIMEAS_WRITECSV  3
 
+static bool utf8=false;
+static bool native=false;
+#define TO_UTF8(s) (utf8 && NEED2UTF8(s))
+#define TO_NATIVE(s) (native && (s)!=NA_STRING && !IS_ASCII(s))
+#define ENCODED_CHAR(s) (TO_UTF8(s) ? translateCharUTF8(s) : (TO_NATIVE(s) ? translateChar(s) : CHAR(s)))
+
 static char sep2;                // '\0' if there are no list columns. Otherwise, the within-column separator.
 static bool logical01=true;      // should logicals be written as 0|1 or true|false. Needed by list column writer too in case a cell is a logical vector.
 static int dateTimeAs=0;         // 0=ISO(yyyy-mm-dd), 1=squash(yyyymmdd), 2=epoch, 3=write.csv
 static const char *sep2start, *sep2end;
 // sep2 is in main fwrite.c so that writeString can quote other fields if sep2 is present in them
 // if there are no list columns, set sep2=='\0'
-
 // Non-agnostic helpers ...
 
 const char *getString(SEXP *col, int64_t row) {   // TODO: inline for use in fwrite.c
   SEXP x = col[row];
-  return x==NA_STRING ? NULL : CHAR(x);
+  return x==NA_STRING ? NULL : ENCODED_CHAR(x);
 }
 
 int getStringLen(SEXP *col, int64_t row) {
@@ -45,7 +50,7 @@ int getMaxCategLen(SEXP col) {
 const char *getCategString(SEXP col, int64_t row) {
   // the only writer that needs to have the header of the SEXP column, to get to the levels
   int x = INTEGER(col)[row];
-  return x==NA_INTEGER ? NULL : CHAR(STRING_ELT(getAttrib(col, R_LevelsSymbol), x-1));
+  return x==NA_INTEGER ? NULL : ENCODED_CHAR(STRING_ELT(getAttrib(col, R_LevelsSymbol), x-1));
 }
 
 writer_fun_t funs[] = {
@@ -164,10 +169,12 @@ SEXP fwriteR(
   SEXP is_gzip_Arg,
   SEXP bom_Arg,
   SEXP yaml_Arg,
-  SEXP verbose_Arg
+  SEXP verbose_Arg,
+  SEXP encoding_Arg
   )
 {
   if (!isNewList(DF)) error(_("fwrite must be passed an object of type list; e.g. data.frame, data.table"));
+
   fwriteMainArgs args = {0};  // {0} to quieten valgrind's uninitialized, #4639
   args.is_gzip = LOGICAL(is_gzip_Arg)[0];
   args.bom = LOGICAL(bom_Arg)[0];
@@ -224,6 +231,8 @@ SEXP fwriteR(
   dateTimeAs = INTEGER(dateTimeAs_Arg)[0];
   logical01 = LOGICAL(logical01_Arg)[0];
   args.scipen = INTEGER(scipen_Arg)[0];
+  utf8 = !strcmp(CHAR(STRING_ELT(encoding_Arg, 0)), "UTF-8");
+  native = !strcmp(CHAR(STRING_ELT(encoding_Arg, 0)), "native");
 
   int firstListColumn = 0;
   for (int j=0; j<args.ncol; j++) {
diff --git a/src/gsumm.c b/src/gsumm.c
index 9c31f4a761..651f1c3385 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -719,8 +719,7 @@ SEXP gmin(SEXP x, SEXP narm)
   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
   if (!isVectorAtomic(x)) error(_("GForce min can only be applied to columns, not .SD or similar. To find min of all items in a list such as .SD, either add the prefix base::min(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,min),by=,.SDcols=]'"));
   if (inherits(x, "factor") && !inherits(x, "ordered")) error(_("min is not meaningful for factors."));
-  R_len_t i, ix, thisgrp=0;
-  int n = (irowslen == -1) ? length(x) : irowslen;
+  const int n = (irowslen == -1) ? length(x) : irowslen;
   //clock_t start = clock();
   SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmin");
@@ -729,27 +728,27 @@ SEXP gmin(SEXP x, SEXP narm)
   case LGLSXP: case INTSXP:
     ans = PROTECT(allocVector(INTSXP, ngrp)); protecti++;
     if (!LOGICAL(narm)[0]) {
-      for (i=0; i<ngrp; i++) INTEGER(ans)[i] = INT_MAX;
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<ngrp; ++i) INTEGER(ans)[i] = INT_MAX;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (INTEGER(x)[ix] < INTEGER(ans)[thisgrp])   // NA_INTEGER==INT_MIN checked in init.c
           INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
       }
     } else {
-      for (i=0; i<ngrp; i++) INTEGER(ans)[i] = NA_INTEGER;
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<ngrp; ++i) INTEGER(ans)[i] = NA_INTEGER;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (INTEGER(x)[ix] == NA_INTEGER) continue;
         if (INTEGER(ans)[thisgrp] == NA_INTEGER || INTEGER(x)[ix] < INTEGER(ans)[thisgrp])
           INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
       }
-      for (i=0; i<ngrp; i++) {
+      for (int i=0; i<ngrp; ++i) {
         if (INTEGER(ans)[i] == NA_INTEGER) {
           warning(_("No non-missing values found in at least one group. Coercing to numeric type and returning 'Inf' for such groups to be consistent with base"));
           ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
-          for (i=0; i<ngrp; i++) {
+          for (int i=0; i<ngrp; i++) {
             if (ISNA(REAL(ans)[i])) REAL(ans)[i] = R_PosInf;
           }
           break;
@@ -760,31 +759,31 @@ SEXP gmin(SEXP x, SEXP narm)
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp)); protecti++;
     if (!LOGICAL(narm)[0]) {
-      for (i=0; i<ngrp; i++) SET_STRING_ELT(ans, i, R_BlankString);
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, char_maxString); // char_maxString == "\xFF\xFF..." in init.c
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (STRING_ELT(x, ix) == NA_STRING) {
           SET_STRING_ELT(ans, thisgrp, NA_STRING);
         } else {
-          if (STRING_ELT(ans, thisgrp) == R_BlankString ||
-            (STRING_ELT(ans, thisgrp) != NA_STRING && strcmp(CHAR(STRING_ELT(x, ix)), CHAR(STRING_ELT(ans, thisgrp))) < 0 )) {
+          if (STRING_ELT(ans, thisgrp)==char_maxString || 
+             (STRING_ELT(ans, thisgrp)!=NA_STRING && strcmp(CHAR(STRING_ELT(x, ix)), CHAR(STRING_ELT(ans, thisgrp))) < 0)) {
             SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
           }
         }
       }
     } else {
-      for (i=0; i<ngrp; i++) SET_STRING_ELT(ans, i, NA_STRING);
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, NA_STRING);
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (STRING_ELT(x, ix) == NA_STRING) continue;
         if (STRING_ELT(ans, thisgrp) == NA_STRING ||
           strcmp(CHAR(STRING_ELT(x, ix)), CHAR(STRING_ELT(ans, thisgrp))) < 0) {
           SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
         }
       }
-      for (i=0; i<ngrp; i++) {
+      for (int i=0; i<ngrp; ++i) {
         if (STRING_ELT(ans, i)==NA_STRING) {
           warning(_("No non-missing values found in at least one group. Returning 'NA' for such groups to be consistent with base"));
           break;
@@ -795,23 +794,23 @@ SEXP gmin(SEXP x, SEXP narm)
   case REALSXP:
     ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++;
     if (!LOGICAL(narm)[0]) {
-      for (i=0; i<ngrp; i++) REAL(ans)[i] = R_PosInf;
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<ngrp; ++i) REAL(ans)[i] = R_PosInf;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (ISNAN(REAL(x)[ix]) || REAL(x)[ix] < REAL(ans)[thisgrp])
           REAL(ans)[thisgrp] = REAL(x)[ix];
       }
     } else {
-      for (i=0; i<ngrp; i++) REAL(ans)[i] = NA_REAL;
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<ngrp; ++i) REAL(ans)[i] = NA_REAL;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (ISNAN(REAL(x)[ix])) continue;
         if (ISNAN(REAL(ans)[thisgrp]) || REAL(x)[ix] < REAL(ans)[thisgrp])
           REAL(ans)[thisgrp] = REAL(x)[ix];
       }
-      for (i=0; i<ngrp; i++) {
+      for (int i=0; i<ngrp; ++i) {
         if (ISNAN(REAL(ans)[i])) {
           warning(_("No non-missing values found in at least one group. Returning 'Inf' for such groups to be consistent with base"));
           for (; i<ngrp; i++) if (ISNAN(REAL(ans)[i])) REAL(ans)[i] = R_PosInf;
@@ -838,24 +837,23 @@ SEXP gmax(SEXP x, SEXP narm)
   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
   if (!isVectorAtomic(x)) error(_("GForce max can only be applied to columns, not .SD or similar. To find max of all items in a list such as .SD, either add the prefix base::max(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,max),by=,.SDcols=]'"));
   if (inherits(x, "factor") && !inherits(x, "ordered")) error(_("max is not meaningful for factors."));
-  R_len_t i, ix, thisgrp=0;
-  int n = (irowslen == -1) ? length(x) : irowslen;
+  const int n = (irowslen == -1) ? length(x) : irowslen;
   //clock_t start = clock();
   SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmax");
 
   // TODO rework gmax in the same way as gmin and remove this *update
   char *update = (char *)R_alloc(ngrp, sizeof(char));
-  for (int i=0; i<ngrp; i++) update[i] = 0;
+  for (int i=0; i<ngrp; ++i) update[i] = 0;
   int protecti=0;
   switch(TYPEOF(x)) {
   case LGLSXP: case INTSXP:
     ans = PROTECT(allocVector(INTSXP, ngrp)); protecti++;
-    for (i=0; i<ngrp; i++) INTEGER(ans)[i] = 0;
+    for (int i=0; i<ngrp; ++i) INTEGER(ans)[i] = 0;
     if (!LOGICAL(narm)[0]) { // simple case - deal in a straightforward manner first
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (INTEGER(x)[ix] != NA_INTEGER && INTEGER(ans)[thisgrp] != NA_INTEGER) {
           if ( update[thisgrp] != 1 || INTEGER(ans)[thisgrp] < INTEGER(x)[ix] ) {
             INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
@@ -864,9 +862,9 @@ SEXP gmax(SEXP x, SEXP narm)
         } else  INTEGER(ans)[thisgrp] = NA_INTEGER;
       }
     } else {
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (INTEGER(x)[ix] != NA_INTEGER) {
           if ( update[thisgrp] != 1 || INTEGER(ans)[thisgrp] < INTEGER(x)[ix] ) {
             INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
@@ -878,11 +876,11 @@ SEXP gmax(SEXP x, SEXP narm)
           }
         }
       }
-      for (i=0; i<ngrp; i++) {
+      for (int i=0; i<ngrp; ++i) {
         if (update[i] != 1)  {// equivalent of INTEGER(ans)[thisgrp] == NA_INTEGER
           warning(_("No non-missing values found in at least one group. Coercing to numeric type and returning 'Inf' for such groups to be consistent with base"));
           ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
-          for (i=0; i<ngrp; i++) {
+          for (int i=0; i<ngrp; ++i) {
             if (update[i] != 1) REAL(ans)[i] = -R_PosInf;
           }
           break;
@@ -892,11 +890,11 @@ SEXP gmax(SEXP x, SEXP narm)
     break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp)); protecti++;
-    for (i=0; i<ngrp; i++) SET_STRING_ELT(ans, i, mkChar(""));
+    for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, mkChar(""));
     if (!LOGICAL(narm)[0]) { // simple case - deal in a straightforward manner first
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (STRING_ELT(x,ix) != NA_STRING && STRING_ELT(ans, thisgrp) != NA_STRING) {
           if ( update[thisgrp] != 1 || strcmp(CHAR(STRING_ELT(ans, thisgrp)), CHAR(STRING_ELT(x,ix))) < 0 ) {
             SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
@@ -905,9 +903,9 @@ SEXP gmax(SEXP x, SEXP narm)
         } else  SET_STRING_ELT(ans, thisgrp, NA_STRING);
       }
     } else {
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if (STRING_ELT(x, ix) != NA_STRING) {
           if ( update[thisgrp] != 1 || strcmp(CHAR(STRING_ELT(ans, thisgrp)), CHAR(STRING_ELT(x, ix))) < 0 ) {
             SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
@@ -919,7 +917,7 @@ SEXP gmax(SEXP x, SEXP narm)
           }
         }
       }
-      for (i=0; i<ngrp; i++) {
+      for (int i=0; i<ngrp; ++i) {
         if (update[i] != 1)  {// equivalent of INTEGER(ans)[thisgrp] == NA_INTEGER
           warning(_("No non-missing values found in at least one group. Returning 'NA' for such groups to be consistent with base"));
           break;
@@ -929,11 +927,11 @@ SEXP gmax(SEXP x, SEXP narm)
     break;
   case REALSXP:
     ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++;
-    for (i=0; i<ngrp; i++) REAL(ans)[i] = 0;
+    for (int i=0; i<ngrp; ++i) REAL(ans)[i] = 0;
     if (!LOGICAL(narm)[0]) {
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if ( !ISNA(REAL(x)[ix]) && !ISNA(REAL(ans)[thisgrp]) ) {
           if ( update[thisgrp] != 1 || REAL(ans)[thisgrp] < REAL(x)[ix] ||
              (ISNAN(REAL(x)[ix]) && !ISNAN(REAL(ans)[thisgrp])) ) { // #1461
@@ -943,9 +941,9 @@ SEXP gmax(SEXP x, SEXP narm)
         } else REAL(ans)[thisgrp] = NA_REAL;
       }
     } else {
-      for (i=0; i<n; i++) {
-        thisgrp = grp[i];
-        ix = (irowslen == -1) ? i : irows[i]-1;
+      for (int i=0; i<n; ++i) {
+        const int thisgrp = grp[i];
+        const int ix = (irowslen == -1) ? i : irows[i]-1;
         if ( !ISNAN(REAL(x)[ix]) ) { // #1461
           if ( update[thisgrp] != 1 || REAL(ans)[thisgrp] < REAL(x)[ix] ) {
             REAL(ans)[thisgrp] = REAL(x)[ix];
@@ -958,7 +956,7 @@ SEXP gmax(SEXP x, SEXP narm)
         }
       }
       // everything taken care of already. Just warn if all NA groups have occurred at least once
-      for (i=0; i<ngrp; i++) {
+      for (int i=0; i<ngrp; ++i) {
         if (update[i] != 1)  { // equivalent of REAL(ans)[thisgrp] == -R_PosInf
           warning(_("No non-missing values found in at least one group. Returning '-Inf' for such groups to be consistent with base"));
           break;
@@ -984,7 +982,7 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
   if (!isVectorAtomic(x)) error(_("GForce median can only be applied to columns, not .SD or similar. To find median of all items in a list such as .SD, either add the prefix stats::median(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,median),by=,.SDcols=]'"));
   if (inherits(x, "factor")) error(_("median is not meaningful for factors."));
   const bool isInt64 = INHERITS(x, char_integer64), narm = LOGICAL(narmArg)[0];
-  int n = (irowslen == -1) ? length(x) : irowslen;
+  const int n = (irowslen == -1) ? length(x) : irowslen;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmedian");
   SEXP ans = PROTECT(allocVector(REALSXP, ngrp));
   double *ansd = REAL(ans);
@@ -1010,7 +1008,8 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
     int *subi = INTEGER(PROTECT(allocVector(INTSXP, maxgrpn)));
     int *xi = INTEGER(x);
     for (int i=0; i<ngrp; i++) {
-      int thisgrpsize = grpsize[i], nacount=0;
+      const int thisgrpsize = grpsize[i];
+      int nacount=0;
       for (int j=0; j<thisgrpsize; ++j) {
         int k = ff[i]+j-1;
         if (isunsorted) k = oo[k]-1;
@@ -1031,9 +1030,7 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
 }
 
 SEXP glast(SEXP x) {
-
-  R_len_t i,k;
-  int n = (irowslen == -1) ? length(x) : irowslen;
+  const int n = (irowslen == -1) ? length(x) : irowslen;
   SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gtail");
   switch(TYPEOF(x)) {
@@ -1041,8 +1038,8 @@ SEXP glast(SEXP x) {
     const int *ix = LOGICAL(x);
     ans = PROTECT(allocVector(LGLSXP, ngrp));
     int *ians = LOGICAL(ans);
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]+grpsize[i]-2;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       ians[i] = ix[k];
@@ -1053,8 +1050,8 @@ SEXP glast(SEXP x) {
     const int *ix = INTEGER(x);
     ans = PROTECT(allocVector(INTSXP, ngrp));
     int *ians = INTEGER(ans);
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]+grpsize[i]-2;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       ians[i] = ix[k];
@@ -1065,8 +1062,8 @@ SEXP glast(SEXP x) {
     const double *dx = REAL(x);
     ans = PROTECT(allocVector(REALSXP, ngrp));
     double *dans = REAL(ans);
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]+grpsize[i]-2;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       dans[i] = dx[k];
@@ -1077,8 +1074,8 @@ SEXP glast(SEXP x) {
     const Rcomplex *dx = COMPLEX(x);
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
     Rcomplex *dans = COMPLEX(ans);
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]+grpsize[i]-2;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       dans[i] = dx[k];
@@ -1086,8 +1083,8 @@ SEXP glast(SEXP x) {
   } break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp));
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]+grpsize[i]-2;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       SET_STRING_ELT(ans, i, STRING_ELT(x, k));
@@ -1095,8 +1092,8 @@ SEXP glast(SEXP x) {
     break;
   case VECSXP:
     ans = PROTECT(allocVector(VECSXP, ngrp));
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]+grpsize[i]-2;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
@@ -1111,9 +1108,7 @@ SEXP glast(SEXP x) {
 }
 
 SEXP gfirst(SEXP x) {
-
-  R_len_t i,k;
-  int n = (irowslen == -1) ? length(x) : irowslen;
+  const int n = (irowslen == -1) ? length(x) : irowslen;
   SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "ghead");
   switch(TYPEOF(x)) {
@@ -1121,8 +1116,8 @@ SEXP gfirst(SEXP x) {
     int const *ix = LOGICAL(x);
     ans = PROTECT(allocVector(LGLSXP, ngrp));
     int *ians = LOGICAL(ans);
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]-1;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       ians[i] = ix[k];
@@ -1133,8 +1128,8 @@ SEXP gfirst(SEXP x) {
     const int *ix = INTEGER(x);
     ans = PROTECT(allocVector(INTSXP, ngrp));
     int *ians = INTEGER(ans);
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]-1;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       ians[i] = ix[k];
@@ -1145,8 +1140,8 @@ SEXP gfirst(SEXP x) {
     const double *dx = REAL(x);
     ans = PROTECT(allocVector(REALSXP, ngrp));
     double *dans = REAL(ans);
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]-1;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       dans[i] = dx[k];
@@ -1157,8 +1152,8 @@ SEXP gfirst(SEXP x) {
     const Rcomplex *dx = COMPLEX(x);
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
     Rcomplex *dans = COMPLEX(ans);
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]-1;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       dans[i] = dx[k];
@@ -1166,8 +1161,8 @@ SEXP gfirst(SEXP x) {
   } break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp));
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]-1;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       SET_STRING_ELT(ans, i, STRING_ELT(x, k));
@@ -1175,8 +1170,8 @@ SEXP gfirst(SEXP x) {
     break;
   case VECSXP:
     ans = PROTECT(allocVector(VECSXP, ngrp));
-    for (i=0; i<ngrp; i++) {
-      k = ff[i]-1;
+    for (int i=0; i<ngrp; ++i) {
+      int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
@@ -1203,8 +1198,8 @@ SEXP ghead(SEXP x, SEXP valArg) {
 SEXP gnthvalue(SEXP x, SEXP valArg) {
 
   if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]<=0) error(_("Internal error, `g[` (gnthvalue) is only implemented single value subsets with positive index, e.g., .SD[2]. This should have been caught before. please report to data.table issue tracker.")); // # nocov
-  R_len_t i,k, val=INTEGER(valArg)[0];
-  int n = (irowslen == -1) ? length(x) : irowslen;
+  const int val=INTEGER(valArg)[0];
+  const int n = (irowslen == -1) ? length(x) : irowslen;
   SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "ghead");
   switch(TYPEOF(x)) {
@@ -1212,9 +1207,9 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
     const int *ix = LOGICAL(x);
     ans = PROTECT(allocVector(LGLSXP, ngrp));
     int *ians = LOGICAL(ans);
-    for (i=0; i<ngrp; i++) {
+    for (int i=0; i<ngrp; ++i) {
       if (val > grpsize[i]) { LOGICAL(ans)[i] = NA_LOGICAL; continue; }
-      k = ff[i]+val-2;
+      int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       ians[i] = ix[k];
@@ -1225,9 +1220,9 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
     const int *ix = INTEGER(x);
     ans = PROTECT(allocVector(INTSXP, ngrp));
     int *ians = INTEGER(ans);
-    for (i=0; i<ngrp; i++) {
+    for (int i=0; i<ngrp; ++i) {
       if (val > grpsize[i]) { INTEGER(ans)[i] = NA_INTEGER; continue; }
-      k = ff[i]+val-2;
+      int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       ians[i] = ix[k];
@@ -1238,9 +1233,9 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
     const double *dx = REAL(x);
     ans = PROTECT(allocVector(REALSXP, ngrp));
     double *dans = REAL(ans);
-    for (i=0; i<ngrp; i++) {
+    for (int i=0; i<ngrp; ++i) {
       if (val > grpsize[i]) { REAL(ans)[i] = NA_REAL; continue; }
-      k = ff[i]+val-2;
+      int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       dans[i] = dx[k];
@@ -1251,9 +1246,9 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
     const Rcomplex *dx = COMPLEX(x);
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
     Rcomplex *dans = COMPLEX(ans);
-    for (i=0; i<ngrp; i++) {
+    for (int i=0; i<ngrp; ++i) {
       if (val > grpsize[i]) { dans[i].r = NA_REAL; dans[i].i = NA_REAL; continue; }
-      k = ff[i]+val-2;
+      int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       dans[i] = dx[k];
@@ -1261,9 +1256,9 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
   } break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp));
-    for (i=0; i<ngrp; i++) {
+    for (int i=0; i<ngrp; ++i) {
       if (val > grpsize[i]) { SET_STRING_ELT(ans, i, NA_STRING); continue; }
-      k = ff[i]+val-2;
+      int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       SET_STRING_ELT(ans, i, STRING_ELT(x, k));
@@ -1271,9 +1266,9 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
     break;
   case VECSXP:
     ans = PROTECT(allocVector(VECSXP, ngrp));
-    for (i=0; i<ngrp; i++) {
+    for (int i=0; i<ngrp; ++i) {
       if (val > grpsize[i]) { SET_VECTOR_ELT(ans, i, R_NilValue); continue; }
-      k = ff[i]+val-2;
+      int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
       SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
@@ -1294,33 +1289,32 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
   if (!isVectorAtomic(x)) error(_("GForce var/sd can only be applied to columns, not .SD or similar. For the full covariance matrix of all items in a list such as .SD, either add the prefix stats::var(.SD) (or stats::sd(.SD)) or turn off GForce optimization using options(datatable.optimize=1). Alternatively, if you only need the diagonal elements, 'DT[,lapply(.SD,var),by=,.SDcols=]' is the optimized way to do this."));
   if (inherits(x, "factor")) error(_("var/sd is not meaningful for factors."));
-  long double m, s, v;
-  R_len_t i, j, ix, thisgrpsize = 0, n = (irowslen == -1) ? length(x) : irowslen;
+  const int n = (irowslen == -1) ? length(x) : irowslen;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gvar");
   SEXP sub, ans = PROTECT(allocVector(REALSXP, ngrp));
-  Rboolean ans_na;
   switch(TYPEOF(x)) {
   case LGLSXP: case INTSXP:
     sub = PROTECT(allocVector(INTSXP, maxgrpn)); // allocate once upfront
     if (!LOGICAL(narm)[0]) {
-      for (i=0; i<ngrp; i++) {
-        m=0.; s=0.; v=0.; ans_na = FALSE;
+      for (int i=0; i<ngrp; ++i) {
+        long double m=0., s=0., v=0.;
+        bool ans_na = false;
         if (grpsize[i] != 1) {
-          thisgrpsize = grpsize[i];
+          const int thisgrpsize = grpsize[i];
           SETLENGTH(sub, thisgrpsize); // to gather this group's data
-          for (j=0; j<thisgrpsize; j++) {
-            ix = ff[i]+j-1;
+          for (int j=0; j<thisgrpsize; ++j) {
+            int ix = ff[i]+j-1;
             if (isunsorted) ix = oo[ix]-1;
             ix = (irowslen == -1) ? ix : irows[ix]-1;
-            if (INTEGER(x)[ix] == NA_INTEGER) { ans_na = TRUE; break; }
+            if (INTEGER(x)[ix] == NA_INTEGER) { ans_na=true; break; }
             INTEGER(sub)[j] = INTEGER(x)[ix];
             m += INTEGER(sub)[j]; // sum
           }
           if (ans_na) { REAL(ans)[i] = NA_REAL; continue; }
           m = m/thisgrpsize; // mean, first pass
-          for (j=0; j<thisgrpsize; j++) s += (INTEGER(sub)[j]-m); // residuals
+          for (int j=0; j<thisgrpsize; ++j) s += (INTEGER(sub)[j]-m); // residuals
           m += (s/thisgrpsize); // mean, second pass
-          for (j=0; j<thisgrpsize; j++) { // variance
+          for (int j=0; j<thisgrpsize; ++j) { // variance
             v += (INTEGER(sub)[j]-(double)m) * (INTEGER(sub)[j]-(double)m);
           }
           REAL(ans)[i] = (double)v/(thisgrpsize-1);
@@ -1328,12 +1322,13 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
         } else REAL(ans)[i] = NA_REAL;
       }
     } else {
-      for (i=0; i<ngrp; i++) {
-        m=0.; s=0.; v=0.; thisgrpsize = 0;
+      for (int i=0; i<ngrp; ++i) {
+        long double m=0., s=0., v=0.;
+        int thisgrpsize = 0;
         if (grpsize[i] != 1) {
           SETLENGTH(sub, grpsize[i]); // to gather this group's data
-          for (j=0; j<grpsize[i]; j++) {
-            ix = ff[i]+j-1;
+          for (int j=0; j<grpsize[i]; ++j) {
+            int ix = ff[i]+j-1;
             if (isunsorted) ix = oo[ix]-1;
             ix = (irowslen == -1) ? ix : irows[ix]-1;
             if (INTEGER(x)[ix] == NA_INTEGER) continue;
@@ -1343,9 +1338,9 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
           }
           if (thisgrpsize <= 1) { REAL(ans)[i] = NA_REAL; continue; }
           m = m/thisgrpsize; // mean, first pass
-          for (j=0; j<thisgrpsize; j++) s += (INTEGER(sub)[j]-m); // residuals
+          for (int j=0; j<thisgrpsize; ++j) s += (INTEGER(sub)[j]-m); // residuals
           m += (s/thisgrpsize); // mean, second pass
-          for (j=0; j<thisgrpsize; j++) { // variance
+          for (int j=0; j<thisgrpsize; ++j) { // variance
             v += (INTEGER(sub)[j]-(double)m) * (INTEGER(sub)[j]-(double)m);
           }
           REAL(ans)[i] = (double)v/(thisgrpsize-1);
@@ -1358,24 +1353,25 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
   case REALSXP:
     sub = PROTECT(allocVector(REALSXP, maxgrpn)); // allocate once upfront
     if (!LOGICAL(narm)[0]) {
-      for (i=0; i<ngrp; i++) {
-        m=0.; s=0.; v=0.; ans_na = FALSE;
+      for (int i=0; i<ngrp; ++i) {
+        long double m=0., s=0., v=0.;
+        bool ans_na=false;
         if (grpsize[i] != 1) {
-          thisgrpsize = grpsize[i];
+          const int thisgrpsize = grpsize[i];
           SETLENGTH(sub, thisgrpsize); // to gather this group's data
-          for (j=0; j<thisgrpsize; j++) {
-            ix = ff[i]+j-1;
+          for (int j=0; j<thisgrpsize; ++j) {
+            int ix = ff[i]+j-1;
             if (isunsorted) ix = oo[ix]-1;
             ix = (irowslen == -1) ? ix : irows[ix]-1;
-            if (ISNAN(REAL(x)[ix])) { ans_na = TRUE; break; }
+            if (ISNAN(REAL(x)[ix])) { ans_na=true; break; }
             REAL(sub)[j] = REAL(x)[ix];
             m += REAL(sub)[j]; // sum
           }
           if (ans_na) { REAL(ans)[i] = NA_REAL; continue; }
           m = m/thisgrpsize; // mean, first pass
-          for (j=0; j<thisgrpsize; j++) s += (REAL(sub)[j]-m); // residuals
+          for (int j=0; j<thisgrpsize; ++j) s += (REAL(sub)[j]-m); // residuals
           m += (s/thisgrpsize); // mean, second pass
-          for (j=0; j<thisgrpsize; j++) { // variance
+          for (int j=0; j<thisgrpsize; ++j) { // variance
             v += (REAL(sub)[j]-(double)m) * (REAL(sub)[j]-(double)m);
           }
           REAL(ans)[i] = (double)v/(thisgrpsize-1);
@@ -1383,12 +1379,13 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
         } else REAL(ans)[i] = NA_REAL;
       }
     } else {
-      for (i=0; i<ngrp; i++) {
-        m=0.; s=0.; v=0.; thisgrpsize = 0;
+      for (int i=0; i<ngrp; ++i) {
+        long double m=0., s=0., v=0.;
+        int thisgrpsize = 0;
         if (grpsize[i] != 1) {
           SETLENGTH(sub, grpsize[i]); // to gather this group's data
-          for (j=0; j<grpsize[i]; j++) {
-            ix = ff[i]+j-1;
+          for (int j=0; j<grpsize[i]; ++j) {
+            int ix = ff[i]+j-1;
             if (isunsorted) ix = oo[ix]-1;
             ix = (irowslen == -1) ? ix : irows[ix]-1;
             if (ISNAN(REAL(x)[ix])) continue;
@@ -1398,9 +1395,9 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
           }
           if (thisgrpsize <= 1) { REAL(ans)[i] = NA_REAL; continue; }
           m = m/thisgrpsize; // mean, first pass
-          for (j=0; j<thisgrpsize; j++) s += (REAL(sub)[j]-m); // residuals
+          for (int j=0; j<thisgrpsize; ++j) s += (REAL(sub)[j]-m); // residuals
           m += (s/thisgrpsize); // mean, second pass
-          for (j=0; j<thisgrpsize; j++) { // variance
+          for (int j=0; j<thisgrpsize; ++j) { // variance
             v += (REAL(sub)[j]-(double)m) * (REAL(sub)[j]-(double)m);
           }
           REAL(ans)[i] = (double)v/(thisgrpsize-1);
@@ -1435,40 +1432,39 @@ SEXP gprod(SEXP x, SEXP narm)
   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
   if (!isVectorAtomic(x)) error(_("GForce prod can only be applied to columns, not .SD or similar. To multiply all items in a list such as .SD, either add the prefix base::prod(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,prod),by=,.SDcols=]'"));
   if (inherits(x, "factor")) error(_("prod is not meaningful for factors."));
-  int i, ix, thisgrp;
-  int n = (irowslen == -1) ? length(x) : irowslen;
+  const int n = (irowslen == -1) ? length(x) : irowslen;
   //clock_t start = clock();
   SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gprod");
   long double *s = malloc(ngrp * sizeof(long double));
   if (!s) error(_("Unable to allocate %d * %d bytes for gprod"), ngrp, sizeof(long double));
-  for (i=0; i<ngrp; i++) s[i] = 1.0;
+  for (int i=0; i<ngrp; ++i) s[i] = 1.0;
   ans = PROTECT(allocVector(REALSXP, ngrp));
   switch(TYPEOF(x)) {
   case LGLSXP: case INTSXP:
-    for (i=0; i<n; i++) {
-      thisgrp = grp[i];
-      ix = (irowslen == -1) ? i : irows[i]-1;
+    for (int i=0; i<n; ++i) {
+      const int thisgrp = grp[i];
+      const int ix = (irowslen == -1) ? i : irows[i]-1;
       if(INTEGER(x)[ix] == NA_INTEGER) {
         if (!LOGICAL(narm)[0]) s[thisgrp] = NA_REAL;  // Let NA_REAL propogate from here. R_NaReal is IEEE.
         continue;
       }
       s[thisgrp] *= INTEGER(x)[ix];  // no under/overflow here, s is long double (like base)
     }
-    for (i=0; i<ngrp; i++) {
+    for (int i=0; i<ngrp; ++i) {
       if (s[i] > DBL_MAX) REAL(ans)[i] = R_PosInf;
       else if (s[i] < -DBL_MAX) REAL(ans)[i] = R_NegInf;
       else REAL(ans)[i] = (double)s[i];
     }
     break;
   case REALSXP:
-    for (i=0; i<n; i++) {
-      thisgrp = grp[i];
-      ix = (irowslen == -1) ? i : irows[i]-1;
+    for (int i=0; i<n; ++i) {
+      const int thisgrp = grp[i];
+      const int ix = (irowslen == -1) ? i : irows[i]-1;
       if(ISNAN(REAL(x)[ix]) && LOGICAL(narm)[0]) continue;  // else let NA_REAL propogate from here
       s[thisgrp] *= REAL(x)[ix];  // done in long double, like base
     }
-    for (i=0; i<ngrp; i++) {
+    for (int i=0; i<ngrp; ++i) {
       if (s[i] > DBL_MAX) REAL(ans)[i] = R_PosInf;
       else if (s[i] < -DBL_MAX) REAL(ans)[i] = R_NegInf;
       else REAL(ans)[i] = (double)s[i];
diff --git a/src/ijoin.c b/src/ijoin.c
index 59bfd8fed4..96a9deae4f 100644
--- a/src/ijoin.c
+++ b/src/ijoin.c
@@ -9,7 +9,7 @@
 SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP multArg, SEXP typeArg, SEXP verbose) {
 
   SEXP vv, tt, lookup, type_lookup;
-  R_len_t i,j,k,*idx,*count,*type_count,xrows=INTEGER(xlen)[0],uxrows=LENGTH(VECTOR_ELT(ux, 0)),uxcols=LENGTH(ux);
+  R_len_t *idx,*count,*type_count,xrows=INTEGER(xlen)[0],uxrows=LENGTH(VECTOR_ELT(ux, 0)),uxcols=LENGTH(ux);
   int *from = (int *)INTEGER(VECTOR_ELT(indices, 0));
   int *to   = (int *)INTEGER(VECTOR_ELT(indices, 1));
   clock_t pass1, pass2, pass3, start;
@@ -37,19 +37,19 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul
   case FIRST:
     switch(type) {
     case EQUAL:
-      for (i=0; i<xrows; i++) {
+      for (int i=0; i<xrows; ++i) {
         count[from[i]-1]++; count[to[i]-1]++;
         type_count[from[i]-1]++; type_count[to[i]-1]++;
       }
       break;
     case START: case END: case ANY: case WITHIN:
-      for (i=0; i<xrows; i++) {
-        for (j=from[i]; j<=to[i]; j++) {
+      for (int i=0; i<xrows; ++i) {
+        for (int j=from[i]; j<=to[i]; ++j) {
           count[j-1]++;
         }
       }
       if (type != WITHIN) {
-        for (i=0; i<uxrows; i++)                      // TODO: this allocation can be avoided if we take care of FIRST/LAST accordingly in 'overlaps'
+        for (int i=0; i<uxrows; ++i)                      // TODO: this allocation can be avoided if we take care of FIRST/LAST accordingly in 'overlaps'
           if (count[i]) type_count[i] = 1;
       }
       break;
@@ -60,27 +60,27 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul
   case LAST :
     switch (type) {
     case ANY:
-      for (i=0; i<xrows; i++) {
-        for (j=from[i]; j<=to[i]; j++) {
+      for (int i=0; i<xrows; ++i) {
+        for (int j=from[i]; j<=to[i]; ++j) {
           count[j-1]++;
           if (from[i]==j && !type_count[j-1]) type_count[j-1]++;
         }
       }
       break;
     case EQUAL:
-      for (i=0; i<xrows; i++) {
+      for (int i=0; i<xrows; ++i) {
         count[from[i]-1]++; count[to[i]-1]++;
         type_count[from[i]-1]++; type_count[to[i]-1]++;
       }
       break;
     case START: case END: case WITHIN:
-      for (i=0; i<xrows; i++) {
-        for (j=from[i]; j<=to[i]; j++) {
+      for (int i=0; i<xrows; ++i) {
+        for (int j=from[i]; j<=to[i]; ++j) {
           count[j-1]++;
         }
       }
       if (type != WITHIN) {
-        for (i=0; i<uxrows; i++)              // TODO: this allocation can be avoided if we take care of FIRST/LAST accordingly in 'overlaps'
+        for (int i=0; i<uxrows; ++i)              // TODO: this allocation can be avoided if we take care of FIRST/LAST accordingly in 'overlaps'
           if (count[i]) type_count[i] = 1;
       }
       break;
@@ -90,30 +90,30 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul
   case ALL :
     switch (type) {
     case START: case END:
-      for (i=0; i<xrows; i++) {
-        for (j=from[i]; j<=to[i]; j++) {
+      for (int i=0; i<xrows; ++i) {
+        for (int j=from[i]; j<=to[i]; ++j) {
           count[j-1]++; type_count[j-1]++;       // alternatively, we could simply do with type_count=count ?
         }
       }
       break;
     case EQUAL:
-      for (i=0; i<xrows; i++) {
+      for (int i=0; i<xrows; ++i) {
         count[from[i]-1]++; count[to[i]-1]++;
         type_count[from[i]-1]++; type_count[to[i]-1]++;
       }
       break;
     case ANY :
-      for (i=0; i<xrows; i++) {
-        k = from[i];
-        for (j=from[i]; j<=to[i]; j++) {
+      for (int i=0; i<xrows; ++i) {
+        const int k = from[i];
+        for (int j=from[i]; j<=to[i]; ++j) {
           count[j-1]++;
           if (k==j) type_count[j-1]++;
         }
       }
       break;
     case WITHIN :
-      for (i=0; i<xrows; i++) {
-        for (j=from[i]; j<=to[i]; j++) {
+      for (int i=0; i<xrows; ++i) {
+        for (int j=from[i]; j<=to[i]; ++j) {
           count[j-1]++;
         }
       }
@@ -130,7 +130,7 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul
   start = clock();
   lookup = VECTOR_ELT(ux, uxcols-4);
   type_lookup = VECTOR_ELT(ux, uxcols-3);
-  for (i=0; i<uxrows; i++) {
+  for (int i=0; i<uxrows; ++i) {
     SET_VECTOR_ELT(lookup, i, vv=allocVector(INTSXP, count[i]));
     if (type != WITHIN) {
       SET_VECTOR_ELT(type_lookup, i, vv=allocVector(INTSXP, type_count[i]));
@@ -144,15 +144,15 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul
   idx = Calloc(uxrows, R_len_t); // resets bits, =0
   switch (type) {
   case ANY: case START: case END: case WITHIN:
-    for (i=0; i<xrows; i++) {
-      for (j=from[i]; j<=to[i]; j++) {
+    for (int i=0; i<xrows; ++i) {
+      for (int j=from[i]; j<=to[i]; ++j) {
         vv = VECTOR_ELT(lookup, j-1);  // cache misses - memory efficiency? but 'lookups' are tiny - takes 0.036s on A.thaliana GFF for entire process)
         INTEGER(vv)[idx[j-1]++] = i+1;
       }
     }
     break;
   case EQUAL:
-    for (i=0; i<xrows; i++) {
+    for (int i=0; i<xrows; ++i) {
       INTEGER(VECTOR_ELT(lookup, from[i]-1))[idx[from[i]-1]++] = i+1;
       INTEGER(VECTOR_ELT(lookup, to[i]-1))[idx[to[i]-1]++] = i+1;
     }
@@ -164,7 +164,7 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul
   if (type != WITHIN) {
     switch (mult) {
     case FIRST :
-      for (i=0; i<uxrows; i++) {
+      for (int i=0; i<uxrows; ++i) {
         if (!count[i]) continue;
         vv = VECTOR_ELT(lookup, i);
         tt = VECTOR_ELT(type_lookup, i);
@@ -175,7 +175,7 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul
       break;
 
     case LAST :
-      for (i=0; i<uxrows; i++) {
+      for (int i=0; i<uxrows; ++i) {
         if (!count[i]) continue;
         vv = VECTOR_ELT(lookup, i);
         tt = VECTOR_ELT(type_lookup, i);
@@ -187,26 +187,26 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul
     case ALL :
       switch (type) {
       case START: case END: case EQUAL:
-        for (i=0; i<uxrows; i++)
+        for (int i=0; i<uxrows; ++i)
           SET_VECTOR_ELT(type_lookup, i, VECTOR_ELT(lookup, i));
         break;
 
       case ANY :
-        for (i=0; i<uxrows; i++) {
+        for (int i=0; i<uxrows; ++i) {
           vv = VECTOR_ELT(lookup, i);
           tt = VECTOR_ELT(type_lookup, i);
-          k=0;
-          for (j=count[i]-type_count[i]; j<count[i]; j++)
+          int k=0;
+          for (int j=count[i]-type_count[i]; j<count[i]; ++j)
             INTEGER(tt)[k++] = INTEGER(vv)[j];
         }
         break;
 
       case WITHIN :
-        // for (i=0; i<uxrows; i++) {
-        //     vv = VECTOR_ELT(lookup, i);
-        //     tt = VECTOR_ELT(type_lookup, i);
-        //     for (j=0; j<type_count[i]; j++)
-        //         INTEGER(tt)[j] = INTEGER(vv)[j];
+        // for (int i=0; i<uxrows; ++i) {
+        //   vv = VECTOR_ELT(lookup, i);
+        //   tt = VECTOR_ELT(type_lookup, i);
+        //   for (int j=0; j<type_count[i]; ++j)
+        //     INTEGER(tt)[j] = INTEGER(vv)[j];
         // }
         break; // #nocov
       default: error(_("Internal error: unknown type in mult=%d in lookup should have been caught earlier: %d"), mult, type); // #nocov
@@ -223,8 +223,8 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul
 
 SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchArg, SEXP verbose) {
 
-  R_len_t i,j,k,m,uxcols=LENGTH(ux),rows=length(VECTOR_ELT(imatches,0));
-  int nomatch = INTEGER(nomatchArg)[0], totlen=0, len, thislen, wlen=0;
+  R_len_t uxcols=LENGTH(ux),rows=length(VECTOR_ELT(imatches,0));
+  int nomatch = INTEGER(nomatchArg)[0], totlen=0, thislen;
   int *from   = (int *)INTEGER(VECTOR_ELT(imatches, 0));
   int *to     = (int *)INTEGER(VECTOR_ELT(imatches, 1));
   int *count   = (int *)INTEGER(VECTOR_ELT(ux, uxcols-2));
@@ -256,14 +256,15 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
     totlen=0;
     switch (type) {
     case START: case END:
-      for (i=0; i<rows; i++)
+      for (int i=0; i<rows; ++i)
         totlen += (from[i] > 0 && type_count[from[i]-1]) ? type_count[from[i]-1] : 1;
       break;
 
     case EQUAL:
-      for (i=0; i<rows; i++) {
-        len = totlen; wlen=0, j=0, m=0;
-        k = (from[i]>0) ? from[i] : 1;
+      for (int i=0; i<rows; ++i) {
+        const int len=totlen;
+        int wlen=0, j=0, m=0;
+        const int k = (from[i]>0) ? from[i] : 1;
         if (k == to[i]) {
           wlen = count[k-1];
         } else if (k < to[i]) {
@@ -284,13 +285,13 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       break;
 
     case ANY:
-      for (i=0; i<rows; i++) {
-        len = totlen;
+      for (int i=0; i<rows; ++i) {
+        const int len = totlen;
         // k = (from[i] > 0) ? from[i] : 1;
-        k = from[i];
+        const int k = from[i];
         if (k<=to[i])
           totlen += count[k-1];
-        for (j=k+1; j<=to[i]; j++)
+        for (int j=k+1; j<=to[i]; ++j)
           totlen += type_count[j-1];
         if (len == totlen)
           ++totlen;
@@ -298,9 +299,10 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       break;
 
     case WITHIN:
-      for (i=0; i<rows; i++) {
-        len = totlen; j=0; m=0;
-        k = from[i];
+      for (int i=0; i<rows; ++i) {
+        const int len=totlen;
+        int j=0, m=0;
+        const int k = from[i];
         if (k > 0) {
           if (k == to[i]) {
             totlen += count[k-1];
@@ -342,12 +344,12 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
   case ALL:
     switch (type) {
     case START : case END :
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         if (from[i] > 0) {
-          k = from[i];
+          const int k = from[i];
           tmp2 = VECTOR_ELT(type_lookup, k-1);
-          for (j=0; j<type_count[k-1]; j++) {
+          for (int j=0; j<type_count[k-1]; ++j) {
             INTEGER(f1__)[thislen] = i+1;
             INTEGER(f2__)[thislen] = INTEGER(tmp2)[j];
             ++thislen;
@@ -362,20 +364,20 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       break;
 
     case EQUAL :
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         if (from[i] > 0 && to[i] > 0) {
-          k = from[i];
+          const int k = from[i];
           if (k == to[i]) {
             tmp1 = VECTOR_ELT(lookup, k-1);
             tmp2 = VECTOR_ELT(type_lookup, to[i]-1);
-            for (j=0; j<count[k-1]; j++) {
+            for (int j=0; j<count[k-1]; ++j) {
               INTEGER(f1__)[thislen] = i+1;
               INTEGER(f2__)[thislen] = INTEGER(tmp1)[j];
               ++thislen;
             }
           } else if (k < to[i]) {
-            j=0; m=0;
+            int j=0, m=0;
             tmp1 = VECTOR_ELT(lookup, k-1);
             tmp2 = VECTOR_ELT(type_lookup, to[i]-1);
             while (j<count[k-1] && m<type_count[to[i]-1]) {
@@ -398,21 +400,21 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       break;
 
     case ANY :
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         // k = (from[i]>0) ? from[i] : 1;
-        k = from[i];
+        const int k = from[i];
         if (k<=to[i]) {
           tmp1 = VECTOR_ELT(lookup, k-1);
-          for (m=0; m<count[k-1]; m++) {
+          for (int m=0; m<count[k-1]; ++m) {
             INTEGER(f1__)[thislen] = i+1;
             INTEGER(f2__)[thislen] = INTEGER(tmp1)[m];
             ++thislen;
           }
         }
-        for (j=k+1; j<=to[i]; j++) {
+        for (int j=k+1; j<=to[i]; ++j) {
           tmp2 = VECTOR_ELT(type_lookup, j-1);
-          for (m=0; m<type_count[j-1]; m++) {
+          for (int m=0; m<type_count[j-1]; ++m) {
             INTEGER(f1__)[thislen] = i+1;
             INTEGER(f2__)[thislen] = INTEGER(tmp2)[m];
             ++thislen;
@@ -428,19 +430,19 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       break;
 
     case WITHIN :
-      for (i=0; i<rows; i++) {
-        len = thislen;
-        k=from[i];
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
+        const int k=from[i];
         if (k > 0) {
           if (k == to[i]) {
             tmp1 = VECTOR_ELT(lookup, k-1);
-            for (j=0; j<count[k-1]; j++) {
+            for (int j=0; j<count[k-1]; ++j) {
               INTEGER(f1__)[thislen] = i+1;
               INTEGER(f2__)[thislen] = INTEGER(tmp1)[j];
               ++thislen;
             }
           } else if (k < to[i]) {
-            j=0; m=0;
+            int j=0, m=0;
             tmp1 = VECTOR_ELT(lookup, k-1);
             tmp2 = VECTOR_ELT(lookup, to[i]-1);
             while (j<count[k-1] && m<count[to[i]-1]) {
@@ -468,10 +470,10 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
   case FIRST:
     switch (type) {
     case START: case END:
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         INTEGER(f1__)[thislen] = i+1;
-        k = (from[i]>0) ? from[i] : 1;
+        const int k = (from[i]>0) ? from[i] : 1;
         if (k <= to[i]) { // count[k-1] is equal to type_count[k-1] and will always be >0, so no length check necessary.
           tmp1 = VECTOR_ELT(lookup, k-1);
           INTEGER(f2__)[thislen] = INTEGER(tmp1)[0];
@@ -485,17 +487,17 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       break;
 
     case EQUAL :
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         INTEGER(f1__)[thislen] = i+1;
         if (from[i] > 0 && to[i] > 0) {
-          k = from[i];
+          const int k = from[i];
           if (k == to[i]) {
             tmp1 = VECTOR_ELT(lookup, k-1);
             INTEGER(f2__)[thislen] = INTEGER(tmp1)[0];
             ++thislen;
           } else if (k < to[i]) {
-            j=0; m=0;
+            int j=0, m=0;
             tmp1 = VECTOR_ELT(lookup, k-1);
             tmp2 = VECTOR_ELT(type_lookup, to[i]-1);
             while (j<count[k-1] && m<type_count[to[i]-1]) {
@@ -517,12 +519,12 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       break;
 
     case ANY:
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         INTEGER(f1__)[thislen] = i+1;
         // k = (from[i]>0) ? from[i] : 1;
-        k = from[i];
-        for (j=k; j<=to[i]; j++) {
+        const int k = from[i];
+        for (int j=k; j<=to[i]; ++j) {
           if (type_count[j-1]) {
             tmp2 = VECTOR_ELT(type_lookup, j-1);
             INTEGER(f2__)[thislen] = INTEGER(tmp2)[0];
@@ -538,17 +540,17 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       break;
 
     case WITHIN:
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         INTEGER(f1__)[thislen] = i+1;
-        k = from[i];
+        const int k = from[i];
         if (k > 0) {
           if (k == to[i] && count[k-1]) {
             tmp1 = VECTOR_ELT(lookup, k-1);
             INTEGER(f2__)[thislen] = INTEGER(tmp1)[0];
             ++thislen;
           } else if (k < to[i]) {
-            j=0; m=0;
+            int j=0, m=0;
             tmp1 = VECTOR_ELT(lookup, k-1);
             tmp2 = VECTOR_ELT(lookup, to[i]-1);
             while (j<count[k-1] && m<count[to[i]-1]) {
@@ -575,10 +577,10 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
   case LAST:
     switch (type) {
     case START: case END:
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         INTEGER(f1__)[thislen] = i+1;
-        k = (from[i]>0) ? from[i] : 1;
+        const int k = (from[i]>0) ? from[i] : 1;
         if (k <= to[i]) { // count[k-1] is equal to type_count[k-1] and will always be >0, so no length check necessary.
           tmp1 = VECTOR_ELT(lookup, k-1);
           INTEGER(f2__)[thislen] = INTEGER(tmp1)[count[k-1]-1];
@@ -600,11 +602,11 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       // n (next line)
       // p val # for native C objects
       // call Rf_PrintValue(val) # for SEXP objects, to print whole vector/vals
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         INTEGER(f1__)[thislen] = i+1;
         if (from[i] > 0 && to[i] > 0) {
-          k = from[i];
+          const int k = from[i];
           if (k == to[i]) {
             tmp1 = VECTOR_ELT(lookup, k-1);
             INTEGER(f2__)[thislen] = INTEGER(tmp1)[count[k-1]-1];
@@ -612,7 +614,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
           } else if (k < to[i]) {
             tmp1 = VECTOR_ELT(lookup, k-1);
             tmp2 = VECTOR_ELT(type_lookup, to[i]-1);
-            j=count[k-1]-1; m=type_count[to[i]-1]-1; // bug fix, k=from[i] but should be to[i]
+            int j=count[k-1]-1, m=type_count[to[i]-1]-1; // bug fix, k=from[i] but should be to[i]
             while (j>=0 && m>=0) {
               if ( INTEGER(tmp1)[j] == INTEGER(tmp2)[m] ) {
                 INTEGER(f2__)[thislen] = INTEGER(tmp1)[j];
@@ -635,37 +637,37 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       // for 'first' we need to just get the minimum of first non-zero-length element, but not the same case for 'last'.
       // We've to loop over from[i]:to[i] and get maximum of all tmp2 values (each is of length 1 already conveniently set uo) in that range
       // case ANY:
-      // for (i=0; i<rows; i++) {
-      //     len = thislen;
-      //     INTEGER(f1__)[thislen] = i+1;
-      //     INTEGER(f2__)[thislen] = 0;
-      //     // k = (from[i]>0) ? from[i] : 1;
-      //     k = from[i];
-      //     for (j=k; j<=to[i]; j++) {
-      //         if (type_count[j-1]) {
-      //             tmp2 = VECTOR_ELT(type_lookup, j-1);
-      //             INTEGER(f2__)[thislen] = (INTEGER(f2__)[thislen] < INTEGER(tmp2)[type_count[j-1]-1]) ? INTEGER(tmp2)[type_count[j-1]-1] : INTEGER(f2__)[thislen];
-      //         }
+      // for (int i=0; i<rows; ++i) {
+      //   len = thislen;
+      //   INTEGER(f1__)[thislen] = i+1;
+      //   INTEGER(f2__)[thislen] = 0;
+      //   // k = (from[i]>0) ? from[i] : 1;
+      //   k = from[i];
+      //   for (int j=k; j<=to[i]; ++j) {
+      //     if (type_count[j-1]) {
+      //       tmp2 = VECTOR_ELT(type_lookup, j-1);
+      //       INTEGER(f2__)[thislen] = (INTEGER(f2__)[thislen] < INTEGER(tmp2)[type_count[j-1]-1]) ? INTEGER(tmp2)[type_count[j-1]-1] : INTEGER(f2__)[thislen];
       //     }
-      //     if (INTEGER(f2__)[thislen] == 0)
-      //         INTEGER(f2__)[thislen] = nomatch;
-      //     ++thislen;
+      //   }
+      //   if (INTEGER(f2__)[thislen] == 0)
+      //     INTEGER(f2__)[thislen] = nomatch;
+      //   ++thislen;
       // }
       // break;
 
     case ANY:
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         INTEGER(f1__)[thislen] = i+1;
         // k = (from[i]>0) ? from[i] : 1;
-        k = from[i];
+        const int k = from[i];
         if (k <= to[i]) {
           if (k==to[i] && count[k-1]) {
             tmp1 = VECTOR_ELT(lookup, k-1);
             INTEGER(f2__)[thislen] = INTEGER(tmp1)[count[k-1]-1];
             ++thislen;
           } else {
-            for (j=to[i]; j>k; j--) {
+            for (int j=to[i]; j>k; --j) {
               if (type_count[j-1]) {
                 tmp2 = VECTOR_ELT(type_lookup, j-1);
                 INTEGER(f2__)[thislen] = INTEGER(tmp2)[0]; // tmp2 will be length 1
@@ -687,10 +689,10 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
       break;
 
     case WITHIN:
-      for (i=0; i<rows; i++) {
-        len = thislen;
+      for (int i=0; i<rows; ++i) {
+        const int len = thislen;
         INTEGER(f1__)[thislen] = i+1;
-        k = from[i];
+        const int k = from[i];
         if (k > 0) {
           if (k == to[i] && count[k-1]) {
             tmp1 = VECTOR_ELT(lookup, k-1);
@@ -699,7 +701,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr
           } else if (k < to[i]) {
             tmp1 = VECTOR_ELT(lookup, k-1);
             tmp2 = VECTOR_ELT(lookup, to[i]-1);
-            j=count[k-1]-1; m=count[to[i]-1]-1;
+            int j=count[k-1]-1, m=count[to[i]-1]-1;
             while (j>=0 && m>=0) {
               if ( INTEGER(tmp1)[j] == INTEGER(tmp2)[m] ) {
                 INTEGER(f2__)[thislen] = INTEGER(tmp1)[j];
diff --git a/src/init.c b/src/init.c
index b388598301..2c65f1d980 100644
--- a/src/init.c
+++ b/src/init.c
@@ -22,6 +22,7 @@ SEXP char_ordered;
 SEXP char_datatable;
 SEXP char_dataframe;
 SEXP char_NULL;
+SEXP char_maxString;
 SEXP sym_sorted;
 SEXP sym_index;
 SEXP sym_BY;
@@ -34,6 +35,7 @@ SEXP sym_inherits;
 SEXP sym_datatable_locked;
 SEXP sym_tzone;
 SEXP sym_old_fread_datetime_character;
+SEXP sym_variable_table;
 double NA_INT64_D;
 long long NA_INT64_LL;
 Rcomplex NA_CPLX;
@@ -219,6 +221,7 @@ R_CallMethodDef callMethods[] = {
 {"CcoerceAs", (DL_FUNC) &coerceAs, -1},
 {"Ctest_dt_win_snprintf", (DL_FUNC)&test_dt_win_snprintf, -1},
 {"Cdt_zlib_version", (DL_FUNC)&dt_zlib_version, -1},
+{"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1},
 {NULL, NULL, 0}
 };
 
@@ -245,8 +248,10 @@ static void setSizes() {
 void attribute_visible R_init_datatable(DllInfo *info)
 // relies on pkg/src/Makevars to mv data.table.so to datatable.so
 {
-  // C exported routines, see ?cdt for details
-  R_RegisterCCallable("data.table", "CsubsetDT", (DL_FUNC) &subsetDT);
+  // C exported routines
+  // must be also listed in inst/include/datatableAPI.h
+  // for end user documentation see ?cdt
+  R_RegisterCCallable("data.table", "DT_subsetDT", (DL_FUNC) &subsetDT);
 
   R_registerRoutines(info, NULL, callMethods, NULL, externalMethods);
   R_useDynamicSymbols(info, FALSE);
@@ -333,6 +338,7 @@ void attribute_visible R_init_datatable(DllInfo *info)
   char_datatable = PRINTNAME(install("data.table"));
   char_dataframe = PRINTNAME(install("data.frame"));
   char_NULL =      PRINTNAME(install("NULL"));
+  char_maxString = PRINTNAME(install("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"));
 
   if (TYPEOF(char_integer64) != CHARSXP) {
     // checking one is enough in case of any R-devel changes
@@ -357,6 +363,7 @@ void attribute_visible R_init_datatable(DllInfo *info)
   sym_datatable_locked = install(".data.table.locked");
   sym_tzone = install("tzone");
   sym_old_fread_datetime_character = install("datatable.old.fread.datetime.character");
+  sym_variable_table = install("variable_table");
 
   initDTthreads();
   avoid_openmp_hang_within_fork();
diff --git a/src/inrange.c b/src/inrange.c
index 158f0ad2b6..626fe70bc2 100644
--- a/src/inrange.c
+++ b/src/inrange.c
@@ -4,11 +4,12 @@
 
 SEXP inrange(SEXP ansArg, SEXP xoArg, SEXP startsArg, SEXP lenArg) {
 
-  int *ans = INTEGER(ansArg), *xo = INTEGER(xoArg);
-  int *starts = INTEGER(startsArg), *len = INTEGER(lenArg);
-  R_len_t i, j, n = length(startsArg), nxo = length(xoArg);
-  for (i = 0; i < n; i++) {
-    for (j = starts[i]-1; j < starts[i]-1+len[i]; j++) {
+  int *ans = INTEGER(ansArg);
+  const int *xo = INTEGER(xoArg);
+  const int *starts = INTEGER(startsArg), *len = INTEGER(lenArg);
+  const int n = length(startsArg), nxo = length(xoArg);
+  for (int i=0; i<n; ++i) {
+    for (int j=starts[i]-1; j<starts[i]-1+len[i]; ++j) {
       ans[nxo ? xo[j]-1 : j] = 1;
     }
   }
@@ -16,7 +17,7 @@ SEXP inrange(SEXP ansArg, SEXP xoArg, SEXP startsArg, SEXP lenArg) {
   // contains A LOT of overlapping indices.. rare in real examples.
   // so switched to simpler logic above.. retaining it commented for now.
 
-  // R_len_t i =0,j, ss,ee,new_ss,new_ee;
+  // R_len_t i=0, ss,ee,new_ss,new_ee;
   // while(i < n && starts[i] == 0) i++;
   // while (i < n) {
   //     ss = starts[i]-1;
@@ -29,7 +30,7 @@ SEXP inrange(SEXP ansArg, SEXP xoArg, SEXP startsArg, SEXP lenArg) {
   //         ee = ee > new_ee ? ee : new_ee;
   //     }
   //     // Rprintf(_("Moved to %d, start=%d, end=%d\n"), i, ss, ee);
-  //     for (j=ss; j<=ee; j++) ans[nxo ? xo[j]-1 : j] = 1;
+  //     for (int j=ss; j<=ee; j++) ans[nxo ? xo[j]-1 : j] = 1;
   // }
   return (R_NilValue);
 }
diff --git a/src/nqrecreateindices.c b/src/nqrecreateindices.c
index ffe1078e2d..a07ce38c7b 100644
--- a/src/nqrecreateindices.c
+++ b/src/nqrecreateindices.c
@@ -9,7 +9,7 @@ SEXP nqRecreateIndices(SEXP xo, SEXP len, SEXP indices, SEXP nArg, SEXP nomatch)
   ans = PROTECT(allocVector(VECSXP, 2));
   SET_VECTOR_ELT(ans, 0, (newstarts = allocVector(INTSXP, n)));
   SET_VECTOR_ELT(ans, 1, (newlen = allocVector(INTSXP, n)));
-  
+
   int *inewlen = INTEGER(newlen);
   const int *iindices = INTEGER(indices);
   const int *ilen = INTEGER(len);
diff --git a/src/openmp-utils.c b/src/openmp-utils.c
index b65a661eaf..22e562506d 100644
--- a/src/openmp-utils.c
+++ b/src/openmp-utils.c
@@ -61,7 +61,7 @@ int getDTthreads(const int64_t n, const bool throttle) {
   // this is the main getter used by all parallel regions; they specify num_threads(n, true|false).
   // Keep this light, simple and robust. initDTthreads() ensures 1 <= DTthreads <= omp_get_num_proc()
   // throttle introduced in 1.12.10 (see NEWS item); #4484
-  // throttle==true  : a number of iterations per thread (DTthrottle) is applied before a second thread is utilized 
+  // throttle==true  : a number of iterations per thread (DTthrottle) is applied before a second thread is utilized
   // throttle==false : parallel region is already pre-chunked such as in fread; e.g. two batches intended for two threads
   if (n<1) return 1; // 0 or negative could be deliberate in calling code for edge cases where loop is not intended to run at all
   int64_t ans = throttle ? 1+(n-1)/DTthrottle :  // 1 thread for n<=1024, 2 thread for n<=2048, etc
diff --git a/src/programming.c b/src/programming.c
new file mode 100644
index 0000000000..4f6cf1a19f
--- /dev/null
+++ b/src/programming.c
@@ -0,0 +1,32 @@
+#include "data.table.h"
+
+static void substitute_call_arg_names(SEXP expr, SEXP env) {
+  R_len_t len = length(expr);
+  if (len && isLanguage(expr)) { // isLanguage is R's is.call
+    SEXP arg_names = getAttrib(expr, R_NamesSymbol);
+    if (!isNull(arg_names)) {
+      SEXP env_names = getAttrib(env, R_NamesSymbol);
+      int *imatches = INTEGER(PROTECT(chmatch(arg_names, env_names, 0)));
+      const SEXP *env_sub = SEXPPTR_RO(env);
+      SEXP tmp = expr;
+      for (int i=0; i<length(arg_names); i++, tmp=CDR(tmp)) { // substitute call arg names
+        if (imatches[i]) {
+          SEXP sym = env_sub[imatches[i]-1];
+          if (!isSymbol(sym))
+            error("Attempting to substitute '%s' element with object of type '%s' but it has to be 'symbol' type when substituting name of the call argument, functions 'as.name' and 'I' can be used to work out proper substitution, see ?substitute2 examples.", CHAR(STRING_ELT(arg_names, i)), type2char(TYPEOF(sym)));
+          SET_TAG(tmp, sym);
+        }
+      }
+      UNPROTECT(1); // chmatch
+    }
+    for (SEXP tmp=expr; tmp!=R_NilValue; tmp=CDR(tmp)) { // recursive call to substitute in nested expressions
+      substitute_call_arg_names(CADR(tmp), env);
+    }
+  }
+}
+SEXP substitute_call_arg_namesR(SEXP expr, SEXP env) {
+  SEXP ans = PROTECT(MAYBE_REFERENCED(expr) ? duplicate(expr) : expr);
+  substitute_call_arg_names(ans, env); // updates in-place
+  UNPROTECT(1);
+  return ans;
+}
diff --git a/src/rbindlist.c b/src/rbindlist.c
index bb42502be6..5dab7fff51 100644
--- a/src/rbindlist.c
+++ b/src/rbindlist.c
@@ -273,7 +273,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg)
   for(int j=0; j<ncol; ++j) {
     int maxType=LGLSXP;  // initialize with LGLSXP for test 2002.3 which has col x NULL in both lists to be filled with NA for #1871
     bool factor=false, orderedFactor=false;     // ordered factor is class c("ordered","factor"). isFactor() is true when isOrdered() is true.
-    int longestLen=0, longestW=-1, longestI=-1; // just for ordered factor
+    int longestLen=-1, longestW=-1, longestI=-1; // just for ordered factor; longestLen must be initialized as -1 so that rbind zero-length ordered factor could work #4795
     SEXP longestLevels=R_NilValue;              // just for ordered factor
     bool int64=false;
     const char *foundName=NULL;
diff --git a/src/snprintf.c b/src/snprintf.c
index 497437644d..94199af707 100644
--- a/src/snprintf.c
+++ b/src/snprintf.c
@@ -109,7 +109,7 @@ int dt_win_snprintf(char *dest, const size_t n, const char *fmt, ...)
     strncpy(ch2, strp[i], strl[i]); // write the reordered specifers without the n$ part
     ch2 += strl[i];
     strcpy(ch2, delim); // includes '\0'
-    ch2 += NDELIM;      // now resting on the '\0'    
+    ch2 += NDELIM;      // now resting on the '\0'
   }
   char *buff = malloc(n); // for the result of the specifiers
   if (!buff) {
@@ -202,46 +202,46 @@ SEXP test_dt_win_snprintf()
 
   dt_win_snprintf(buff, 50, "%1$d %s", 9, "foo");
   if (strcmp(buff, "3 some %n$ but not all"))                                       error(_("dt_win_snprintf test %d failed: %s"), 5, buff);
-  
+
   dt_win_snprintf(buff, 50, "%%1$foo%d", 9);  // The %1$f is not a specifier because % is doubled
   if (strcmp(buff, "%1$foo9"))                                                      error(_("dt_win_snprintf test %d failed: %s"), 6, buff);
-  
+
   dt_win_snprintf(buff, 40, "long format string more than n==%d chopped", 40); // regular library (no %n$) chops to 39 chars + '/0'
   if (strlen(buff)!=39 || strcmp(buff, "long format string more than n==40 chop"))  error(_("dt_win_snprintf test %d failed: %s"), 7, buff);
-  
+
   dt_win_snprintf(buff, 40, "long %3$s %2$s more than n==%1$d chopped", 40, "string", "format"); // same with dt_win_snprintf
   if (strlen(buff)!=39 || strcmp(buff, "long format string more than n==40 chop"))  error(_("dt_win_snprintf test %d failed: %s"), 8, buff);
-  
+
   int res = dt_win_snprintf(buff, 10, "%4$d%2$d%3$d%5$d%1$d", 111, 222, 33, 44, 555); // fmt longer than n
   if (strlen(buff)!=9 || strcmp(buff, "442223355"))                                 error(_("dt_win_snprintf test %d failed: %s"), 9, buff);
   if (res!=13) /* should return what would have been written if not chopped */      error(_("dt_win_snprintf test %d failed: %s"), 10, res);
-  
+
   dt_win_snprintf(buff, 39, "%l", 3);
   if (strlen(buff)!=38 || strcmp(buff, "0 %l    does not end with recognized t"))   error(_("dt_win_snprintf test %d failed: %s"), 11, buff);
-  
+
   dt_win_snprintf(buff, 19, "%l", 3);
   if (strlen(buff)!=18 || strcmp(buff, "0 %l    does not e"))                       error(_("dt_win_snprintf test %d failed: %s"), 12, buff);
-  
+
   dt_win_snprintf(buff, 50, "%1$d == %0$d", 1, 2);
   if (strcmp(buff, "1 %0$ outside range [1,99]"))                                   error(_("dt_win_snprintf test %d failed: %s"), 13, buff);
-  
+
   dt_win_snprintf(buff, 50, "%1$d == %$d", 1, 2);
   if (strcmp(buff, "1 %$ outside range [1,99]"))                                    error(_("dt_win_snprintf test %d failed: %s"), 14, buff);
-  
+
   dt_win_snprintf(buff, 50, "%1$d == %100$d", 1, 2);
   if (strcmp(buff, "1 %100$ outside range [1,99]"))                                 error(_("dt_win_snprintf test %d failed: %s"), 15, buff);
-  
+
   dt_win_snprintf(buff, 50, "%1$d == %-1$d", 1, 2);
   if (strcmp(buff, "1 %-1$ outside range [1,99]"))                                  error(_("dt_win_snprintf test %d failed: %s"), 16, buff);
-  
+
   dt_win_snprintf(buff, 50, "%1$d == %3$d", 1, 2, 3);
   if (strcmp(buff, "5 %2$ missing"))                                                error(_("dt_win_snprintf test %d failed: %s"), 17, buff);
-  
+
   dt_win_snprintf(buff, 50, "%1$d == %1$d", 42);
   if (strcmp(buff, "2 %1$ appears twice"))                                          error(_("dt_win_snprintf test %d failed: %s"), 18, buff);
-  
+
   dt_win_snprintf(buff, 50, "%1$d + %3$d - %2$d == %3$d", 1, 1, 2);
   if (strcmp(buff, "2 %3$ appears twice"))                                          error(_("dt_win_snprintf test %d failed: %s"), 19, buff);
-  
+
   return R_NilValue;
 }
diff --git a/src/subset.c b/src/subset.c
index b0c1ce6c8b..36a455ee85 100644
--- a/src/subset.c
+++ b/src/subset.c
@@ -102,7 +102,7 @@ void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA)
   }
 }
 
-static const char *check_idx(SEXP idx, int max, bool *anyNA_out, bool *orderedSubset_out)
+const char *check_idx(SEXP idx, int max, bool *anyNA_out, bool *orderedSubset_out)
 // set anyNA for branchless subsetVectorRaw
 // error if any negatives, zeros or >max since they should have been dealt with by convertNegAndZeroIdx() called ealier at R level.
 // single cache efficient sweep with prefetch, so very low priority to go parallel
diff --git a/src/utils.c b/src/utils.c
index fae6351e7e..60eec10783 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -64,7 +64,7 @@ bool allNA(SEXP x, bool errorForBadType) {
     return true;
   case CPLXSXP: {
     const Rcomplex *xd = COMPLEX(x);
-    for (int i=0; i<n; ++i) if (!ISNAN_COMPLEX(xd[i])) { 
+    for (int i=0; i<n; ++i) if (!ISNAN_COMPLEX(xd[i])) {
       return false;
     }
     return true;
@@ -187,7 +187,7 @@ SEXP copyAsPlain(SEXP x) {
   // For non-ALTREP this should do the same as R's duplicate().
   // Intended for use on columns; to either un-ALTREP them or duplicate shared memory columns; see copySharedColumns() below
   // Not intended to be called on a DT VECSXP where a concept of 'deep' might refer to whether the columns are copied
-  
+
   if (isNull(x)) {
     // deal with up front because isNewList(R_NilValue) is true
     return R_NilValue;
@@ -245,7 +245,7 @@ void copySharedColumns(SEXP x) {
     const SEXP thiscol = xp[i];
     savetl[i] = ALTREP(thiscol) ? 0 : TRUELENGTH(thiscol);
     SET_TRUELENGTH(thiscol, 0);
-  } 
+  }
   int nShared=0;
   for (int i=0; i<ncol; ++i) {
     SEXP thiscol = xp[i];
@@ -258,7 +258,7 @@ void copySharedColumns(SEXP x) {
                                       // 'shared' means a later column shares an earlier column
       SET_TRUELENGTH(thiscol, -i-1);  // -i-1 so that if, for example, column 3 shares column 1, in iteration 3 we'll know not
                                       // only that the 3rd column is shared with an earlier column, but which one too. Although
-                                      // we don't use that information currently, we could do in future.  
+                                      // we don't use that information currently, we could do in future.
     }
   }
   // now we know nShared and which ones they are (if any), restore original tl back to the unique set of columns
diff --git a/vignettes/Makefile b/vignettes/Makefile
deleted file mode 100644
index bdc2822fc6..0000000000
--- a/vignettes/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# Makefile to use knitr for package vignettes
-
-clean:
-	rm -rf *.tex *.bbl *.blg *.aux *.out *.toc *.log *.spl *tikzDictionary *.md figure/
-
-%.html: %.Rmd
-	$(R_HOME)/bin/Rscript -e "if (getRversion() < '3.0.0') knitr::knit2html('$*.Rmd')"
diff --git a/vignettes/datatable-faq.Rmd b/vignettes/datatable-faq.Rmd
index e0cd81b343..1df42e166c 100644
--- a/vignettes/datatable-faq.Rmd
+++ b/vignettes/datatable-faq.Rmd
@@ -68,9 +68,20 @@ The `j` expression is the 2nd argument. Try `DT[ , c("x","y","z")]` or `DT[ , .(
 
 ## I assigned a variable `mycol = "x"` but then `DT[ , mycol]` returns `"x"`. How do I get it to look up the column name contained in the `mycol` variable?
 
-In v1.9.8 released Nov 2016 there is an ability to turn on new behaviour: `options(datatable.WhenJisSymbolThenCallingScope=TRUE)`. It will then work as you expected, just like data.frame. If you are a new user of data.table, you should probably do this. You can place this command in your .Rprofile file so you don't have to remember again. See the long item in release notes about this. The release notes are linked at the top of the data.table homepage: [NEWS](https://github.com/Rdatatable/data.table/blob/master/NEWS.md).
+What's happening is that the `j` expression sees objects in the calling scope. The variable `mycol` does not exist as a column name of `DT` so `data.table` then looked in the calling scope and found `mycol` there and returned its value `"x"`. This is correct behaviour currently. Had `mycol` been a column name, then that column's data would have been returned.
 
-Without turning on that new behaviour, what's happening is that the `j` expression sees objects in the calling scope. The variable `mycol` does not exist as a column name of `DT` so data.table then looked in the calling scope and found `mycol` there and returned its value `"x"`. This is correct behaviour currently. Had `mycol` been a column name, then that column's data would have been returned. What has been done to date has been `DT[ , mycol, with = FALSE]` which will return the `x` column's data as required. That will still work in the future, too. Alternatively, since a data.table _is_ a `list`, too, you have been and still will be able to write and rely on `DT[[mycol]]`.
+To get the column `x` from `DT`, there are a few options:
+
+```r
+# using .. to tell data.table the variable should be evaluated
+DT[ , ..mycol]
+# using with=FALSE to do the same
+DT[ , mycol, with=FALSE]
+# treating DT as a list and using [[
+DT[[mycol]]
+```
+
+The `with` argument refers to the `base` function `with` -- when `with=TRUE`, `data.table` operates similar to `with`, i.e. `DT[ , mycol]` behaves like `with(DT, mycol)`. When `with=FALSE`, the standard `data.frame` evaluation rules apply.
 
 ## What are the benefits of being able to use column names as if they are variables inside `DT[...]`?
 
@@ -384,7 +395,7 @@ A key advantage of column vectors in R is that they are _ordered_, unlike SQL[^2
  - `DT[ColA == ColB]` is simpler than `DF[!is.na(ColA) & !is.na(ColB) & ColA == ColB, ]`
  - `data.frame(list(1:2, "k", 1:4))` creates 3 columns, data.table creates one `list` column.
  - `check.names` is by default `TRUE` in `data.frame` but `FALSE` in data.table, for convenience.
- - `stringsAsFactors` is by default `TRUE` in `data.frame` but `FALSE` in data.table, for efficiency. Since a global string cache was added to R, characters items are a pointer to the single cached string and there is no longer a performance benefit of converting to `factor`.
+ - `data.table` has always set `stringsAsFactors=FALSE` by default. In R 4.0.0 (Apr 2020), `data.frame`'s default was changed from `TRUE` to `FALSE` and there is no longer a difference in this regard; see [stringsAsFactors, Kurt Hornik, Feb 2020](https://developer.r-project.org/Blog/public/2020/02/16/stringsasfactors/).
  - Atomic vectors in `list` columns are collapsed when printed using `", "` in `data.frame`, but `","` in data.table with a trailing comma after the 6th item to avoid accidental printing of large embedded objects.
 
 In `[.data.frame` we very often set `drop = FALSE`. When we forget, bugs can arise in edge cases where single columns are selected and all of a sudden a vector is returned rather than a single column `data.frame`. In `[.data.table` we took the opportunity to make it consistent and dropped `drop`.
diff --git a/vignettes/datatable-intro.Rmd b/vignettes/datatable-intro.Rmd
index 1dcfe786f5..c5da5d87d8 100644
--- a/vignettes/datatable-intro.Rmd
+++ b/vignettes/datatable-intro.Rmd
@@ -88,8 +88,6 @@ You can also convert existing objects to a `data.table` using `setDT()` (for `da
 
 #### Note that: {.bs-callout .bs-callout-info}
 
-* Unlike `data.frame`s, columns of `character` type are *never* converted to `factors` by default.
-
 * Row numbers are printed with a `:` in order to visually separate the row number from the first column.
 
 * When the number of rows to print exceeds the global option `datatable.print.nrows` (default = `r getOption("datatable.print.nrows")`), it automatically prints only the top 5 and bottom 5 rows (as can be seen in the [Data](#data) section). If you've had a lot of experience with `data.frame`s, you may have found yourself waiting around while larger tables print-and-page, sometimes seemingly endlessly. You can query the default number like so:
diff --git a/vignettes/datatable-keys-fast-subset.Rmd b/vignettes/datatable-keys-fast-subset.Rmd
index 541e8bb5ba..917a904136 100644
--- a/vignettes/datatable-keys-fast-subset.Rmd
+++ b/vignettes/datatable-keys-fast-subset.Rmd
@@ -242,7 +242,7 @@ flights[.(unique(origin), "MIA")]
 
 * *"MIA"* is automatically recycled to fit the length of `unique(origin)` which is *3*.
 
-## 2) Combining keys with `j` and `by`
+## 2. Combining keys with `j` and `by`
 
 All we have seen so far is the same concept -- obtaining *row indices* in `i`, but just using a different method -- using `keys`. It shouldn't be surprising that we can do exactly the same things in `j` and `by` as seen from the previous vignettes. We will highlight this with a few examples.
 
@@ -340,7 +340,7 @@ key(ans)
 
 * We use `keyby` to automatically key that result by *month*. Now we understand what that means. In addition to ordering, it also sets *month* as the `key` column.
 
-## 3) Additional arguments - `mult` and `nomatch`
+## 3. Additional arguments - `mult` and `nomatch`
 
 ### a) The *mult* argument
 
@@ -376,7 +376,7 @@ flights[.(c("LGA", "JFK", "EWR"), "XNA"), mult = "last", nomatch = NULL]
 
 * The query “JFK”, “XNA” doesn’t match any rows in flights and therefore is skipped.
 
-## 4) binary search vs vector scans
+## 4. binary search vs vector scans
 
 We have seen so far how we can set and use keys to subset. But what's the advantage? For example, instead of doing:
 
diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd
new file mode 100644
index 0000000000..46008e7045
--- /dev/null
+++ b/vignettes/datatable-programming.Rmd
@@ -0,0 +1,420 @@
+---
+title: "Programming on data.table"
+date: "`r Sys.Date()`"
+output:
+  rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Programming on data.table}
+  %\VignetteEngine{knitr::rmarkdown}
+  \usepackage[utf8]{inputenc}
+---
+
+```{r init, include = FALSE}
+require(data.table)
+knitr::opts_chunk$set(
+  comment = "#",
+    error = FALSE,
+     tidy = FALSE,
+    cache = FALSE,
+ collapse = TRUE
+)
+```
+
+
+## Introduction
+
+`data.table`, from its very first releases, enabled the usage of `subset` and `with` (or `within`) functions by defining the`[.data.table` method. `subset` and `with` are base R functions that are useful for reducing repetition in code, enhancing readability, and reducing number the total characters the user has to type. This functionality is possible in R because of a quite unique feature called *lazy evaluation*. This feature allows a function to catch its arguments, before they are evaluated, and to evaluate them in a different scope than the one in which they were called. Let's recap usage of the `subset` function.
+
+```{r opt_max_print_10, include = FALSE}
+options(max.print = 10L) # 2 rows
+```
+
+```{r subset}
+subset(iris, Species == "setosa")
+```
+
+Here, `subset` takes the second argument and evaluates it within the scope of the `data.frame` given as its first argument. This removes the need for variable repetition, making it less prone to errors, and makes the code more readable.
+
+## Problem description
+
+The problem with this kind of interface is that we cannot easily parameterize the code that uses it. This is because the expressions passed to those functions are substituted before being evaluated.
+
+### Example
+
+```{r subset_error, error=TRUE}
+my_subset = function(data, col, val) {
+  subset(data, col == val)
+}
+my_subset(iris, Species, "setosa")
+```
+
+### Approaches to the problem
+
+There are multiple ways to work around this problem.
+
+#### Avoid *lazy evaluation*
+
+The easiest workaround is to avoid *lazy evaluation* in the first place, and fall back to less intuitive, more error-prone approaches like `df[["variable"]]`, etc.
+
+```{r subset_nolazy}
+my_subset = function(data, col, val) {
+  data[data[[col]] == val, ]
+}
+my_subset(iris, col = "Species", val = "setosa")
+```
+
+Here, we compute a logical vector of length `nrow(iris)`, then this vector is supplied to the `i` argument of `[.data.frame` to perform ordinary logical vector subsetting. It works well for this simple example, but it lacks flexibility, introduces variable repetition, and requires user to change the function interface to pass the column name as a character rather than unquoted symbol. The more complex the expression we need to parameterize, the less practical this approach becomes.
+
+#### Use of `parse` / `eval`
+
+This method is usually preferred by newcomers to R as it is, perhaps, the most straightforward conceptually. This way requires producing the required expression using string concatenation, parsing it, and then evaluating it.
+
+```{r subset_parse}
+my_subset = function(data, col, val) {
+  data = deparse(substitute(data))
+  col  = deparse(substitute(col))
+  val  = paste0("'", val, "'")
+  text = paste0("subset(", data, ", ", col, " == ", val, ")")
+  eval(parse(text = text)[[1L]])
+}
+my_subset(iris, Species, "setosa")
+```
+
+We have to use `deparse(substitute(...))` to catch the actual names of objects passed to function so we can construct the `subset` function call using those original names. Although ths provides unlimited flexibility with relatively low complexity, **use of `eval(parse(...))` should be avoided**. The main reasons are:
+
+- lack of syntax validation
+- [vulnerability to code injection](https://github.com/Rdatatable/data.table/issues/2655#issuecomment-376781159)
+- the existence of better alternatives
+
+Martin Machler, R Project Core Developer, [once said](https://stackoverflow.com/a/40164111/2490497):
+
+> Sorry but I don't understand why too many people even think a string was something that could be evaluated. You must change your mindset, really. Forget all connections between strings on one side and expressions, calls, evaluation on the other side.
+The (possibly) only connection is via `parse(text = ....)` and all good R programmers should know that this is rarely an efficient or safe means to construct expressions (or calls). Rather learn more about `substitute()`, `quote()`, and possibly the power of using `do.call(substitute, ......)`.
+
+#### Computing on the language
+
+The aforementioned functions, along with some others (including `as.call`, `as.name`/`as.symbol`, `bquote`, and `eval`), can be categorized as functions to *compute on the language*, as they operate on _language_ objects (e.g. `call`, `name`/`symbol`).
+
+```{r subset_substitute}
+my_subset = function(data, col, val) {
+  eval(substitute(subset(data, col == val)))
+}
+my_subset(iris, Species, "setosa")
+```
+
+Here, we used the base R `substitute` function to transform the call `subset(data, col == val)` into `subset(iris, Species == "setosa")` by substituting `data`, `col`, and `val` with their original names (or values) from their parent environment. The benefits of this approach to the previous ones should be clear. Note that because we operate at the level of language objects, and don't have to resort to string manipulation, we refer to this as *computing on the language*. There is a dedicated chapter on *Computing on the language* in [R language manual](https://cloud.r-project.org/doc/manuals/r-release/R-lang.html). Although it is not necessary for *programming on data.table*, we encourage readers to read this chapter for the sake of better understanding this powerful and unique feature.
+
+#### Use third party packages
+
+There are third party packages that can achieve what base R computing on the language routines do (`pryr`, `lazyeval` and `rlang`, to name a few).
+
+Though these can be helpful, we will be discussing a `data.table`-unique approach here.
+
+## Programming on data.table
+
+Now that we've established the proper way to parameterize code that uses *lazy evaluation*, we can move on to the main subject of this vignette, *programming on data.table*.
+
+Starting from version 1.14.2, data.table provides a robust mechanism for parameterizing expressions passed to the `i`, `j`, and `by` (or `keyby`) arguments of `[.data.table`. It is built upon the base R `substitute` function, and mimics its interface. Here, we introduce `substitute2` as a more robust and more user-friendly version of base R's `substitute`. For a complete list of differences between `base::substitute` and `data.table::substitute2` please read the [`substitute2` manual](https://rdatatable.gitlab.io/data.table/library/data.table/html/substitute2.html).
+
+### Substituting variables and names
+
+Let's say we want to have a general function that applies a function to sum of two arguments that has been applied another function. As a concrete example, below we have a function to compute the length of the hypotenuse in a right triangle, knowing length of its legs.
+
+${\displaystyle c = \sqrt{a^2 + b^2}}$
+
+```{r hypotenuse}
+square = function(x) x^2
+quote(
+  sqrt(square(a) + square(b))
+)
+```
+
+The goal is the make every name in the above call able to be passed as a parameter.
+
+```{r hypotenuse_substitute2}
+substitute2(
+  outer(inner(var1) + inner(var2)),
+  env = list(
+    outer = "sqrt",
+    inner = "square",
+    var1 = "a",
+    var2 = "b"
+  )
+)
+```
+
+We can see in the output that both the functions names, as well as the names of the variables passed to those functions, have been replaced.. We used `substitute2` for convenience. In this simple case, base R's `substitute` could have been used as well, though it would've required usage of `lapply(env, as.name)`.
+
+Now, to use substitution inside `[.data.table`, we don't need to call the `substitute2` function. As it is now being used internally, all we have to do is to provide `env` argument, the same way as we've provided it to the `substitute2` function in the example above. Substitution can be applied to the `i`, `j` and `by` (or `keyby`) arguments of the `[.data.table` method. Note that setting the `verbose` argument to `TRUE` can be used to print expressions after substitution is applied. This is very useful for debugging.
+
+Let's use the `iris` data set as a demonstration. Just as an example, let's pretend we want to compute the `Sepal.Hypotenuse`, treating the sepal width and length as if they were legs of a right triangle.
+
+```{r opt_max_print_8, include = FALSE}
+options(max.print = 8L) # 2 rows
+```
+
+```{r hypotenuse_datatable}
+DT = as.data.table(iris)
+
+DT[, outer(inner(var1) + inner(var2)),
+   env = list(
+     outer = "sqrt",
+     inner = "square",
+     var1 = "Sepal.Length",
+     var2 = "Sepal.Width"
+  )]
+
+# return as a data.table
+DT[, .(Species, var1, var2, out = outer(inner(var1) + inner(var2))),
+   env = list(
+     outer = "sqrt",
+     inner = "square",
+     var1 = "Sepal.Length",
+     var2 = "Sepal.Width",
+     out = "Sepal.Hypotenuse"
+  )]
+```
+
+In the last call, we added another parameter, `out = "Sepal.Hypotenuse"`, that conveys the intended name of output column. Unlike base R's `substitute`, `substitute2` will handle the substitution of the names of call arguments, as well.
+
+Substitution works on `i` and `by` (or `keyby`), as well.
+
+```{r hypotenuse_datatable_i_j_by}
+DT[filter_col %in% filter_val,
+   .(var1, var2, out = outer(inner(var1) + inner(var2))),
+   by = by_col,
+   env = list(
+     outer = "sqrt",
+     inner = "square",
+     var1 = "Sepal.Length",
+     var2 = "Sepal.Width",
+     out = "Sepal.Hypotenuse",
+     filter_col = "Species",
+     filter_val = I(c("versicolor", "virginica")),
+     by_col =  "Species"
+  )]
+```
+
+### Substitute variables and character values
+
+In the above example, we have seen a convenient feature of `substitute2`: automatic conversion from strings into names/symbols. An obvious question arises: what if we actually want to substitute a parameter with a *character* value, so as to have base R `substitute` behaviour. We provide a mechanism to escape automatic conversion by wrapping the elements into base R `I()` call. The `I` function marks an object as *AsIs*, preventing its arguments from substitution. (Read the `?AsIs` documentation for more details.) If base R behaviour is desired for the whole `env` argument, then it's best to wrap the whole argument in `I()`. Alternatively, each list element can be wrapped in `I()` individually. Let's explore both cases below.
+
+```{r rank}
+substitute(    # base R behaviour
+  rank(input, ties.method = ties),
+  env = list(input = as.name("Sepal.Width"), ties = "first")
+)
+
+substitute2(   # mimicking base R's "substitute" using "I"
+  rank(input, ties.method = ties),
+  env = I(list(input = as.name("Sepal.Width"), ties = "first"))
+)
+
+substitute2(   # only particular elements of env are used "AsIs"
+  rank(input, ties.method = ties),
+  env = list(input = "Sepal.Width", ties = I("first"))
+)
+```
+
+Note that conversion works recursively on each list element, including the escape mechanism of course.
+
+```{r substitute2_recursive}
+substitute2(   # all are symbols
+  f(v1, v2),
+  list(v1 = "a", v2 = list("b", list("c", "d")))
+)
+substitute2(   # 'a' and 'd' should stay as character
+  f(v1, v2),
+  list(v1 = I("a"), v2 = list("b", list("c", I("d"))))
+)
+```
+
+### Substituting lists of arbitrary length
+
+The example presented above illustrates a neat and powerful way to make your code more dynamic. However, there are many other much more complex cases that a developer might have to deal with. One common problem handling a list of arguments of arbitrary length.
+
+An obvious use case could be to mimic `.SD` functionality by injecting a `list` call into the `j` argument.
+
+```{r opt_max_print_4, include = FALSE}
+options(max.print = 4L) # 2 rows
+```
+
+```{r splice_sd}
+cols = c("Sepal.Length", "Sepal.Width")
+DT[, .SD, .SDcols = cols]
+```
+
+Having `cols` parameter, we'd want to splice it into a `list` call, making `j` argument look like in the code below.
+
+```{r splice_tobe}
+DT[, list(Sepal.Length, Sepal.Width)]
+```
+
+*Splicing* is an operation where a list of objects have to be inlined into an expression as a sequence of arguments to call.
+In base R, splicing `cols` into a `list` can be achieved using `as.call(c(quote(list), cols))`. Additionally, starting from R 4.0.0, there is new interface for such an operation in the `bquote` function.
+
+In data.table, we make it easier by automatically _enlist_-ing a list of objects into a list call with those objects. This means that any `list` object inside the `env` list argument will be turned into list `call`, making the API for that use case as simple as presented below.
+
+```{r splice_datatable}
+# this works
+DT[, j,
+   env = list(j = as.list(cols)),
+   verbose = TRUE]
+
+# this will not work
+#DT[, list(cols),
+#   env = list(cols = cols)]
+```
+
+It is important to provide a call to `as.list`, rather than simply a list, inside the `env` list argument, as is shown in the above example.
+
+Let's explore _enlist_-ing in more detail.
+
+```{r splice_enlist}
+DT[, j,  # data.table automatically enlists nested lists into list calls
+   env = list(j = as.list(cols)),
+   verbose = TRUE]
+
+DT[, j,  # turning the above 'j' list into a list call
+   env = list(j = quote(list(Sepal.Length, Sepal.Width))),
+   verbose = TRUE]
+
+DT[, j,  # the same as above but accepts character vector
+   env = list(j = as.call(c(quote(list), lapply(cols, as.name)))),
+   verbose = TRUE]
+```
+
+Now let's try to pass a list of symbols, rather than list call to those symbols. We'll use `I()` to escape automatic _enlist_-ing but, as this will also turn off character to symbol conversion, we also have to use `as.name`.
+
+```{r splice_not, error=TRUE}
+DT[, j,  # list of symbols
+   env = I(list(j = lapply(cols, as.name))),
+   verbose = TRUE]
+
+DT[, j,  # again the proper way, enlist list to list call automatically
+   env = list(j = as.list(cols)),
+   verbose = TRUE]
+```
+
+Note that both expressions, although visually appearing to be the same, are not identical.
+
+```{r splice_substitute2_not}
+str(substitute2(j, env = I(list(j = lapply(cols, as.name)))))
+
+str(substitute2(j, env = list(j = as.list(cols))))
+```
+
+For more detailed explanation on that matter, please see the examples in the [`substitute2` documentation](https://rdatatable.gitlab.io/data.table/library/data.table/html/substitute2.html).
+
+### Substitution of a complex query
+
+Let's take, as an example of a more complex function, calculating root mean square.
+
+${\displaystyle x_{\text{RMS}}={\sqrt{{\frac{1}{n}}\left(x_{1}^{2}+x_{2}^{2}+\cdots +x_{n}^{2}\right)}}}$
+
+It takes arbitrary number of variables on input, but now we cannot just *splice* a list of arguments into a list call because each of those arguments have to be wrapped in a `square` call. In this case, we have to *splice* by hand rather than relying on data.table's automatic _enlist_.
+
+First, we have to construct calls to the `square` function for each of the variables (see `inner_calls`). Then, we have to reduce the list of calls into a single call, having a nested sequence of `+` calls (see `add_calls`). Lastly, we have to substitute the constructed call into the surrounding expression (see `rms`).
+
+```{r opt_max_print_12, include = FALSE}
+options(max.print = 12L) # 2 rows
+```
+
+```{r complex}
+outer = "sqrt"
+inner = "square"
+vars = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
+
+syms = lapply(vars, as.name)
+to_inner_call = function(var, fun) call(fun, var)
+inner_calls = lapply(syms, to_inner_call, inner)
+print(inner_calls)
+
+to_add_call = function(x, y) call("+", x, y)
+add_calls = Reduce(to_add_call, inner_calls)
+print(add_calls)
+
+rms = substitute2(
+  expr = outer((add_calls) / len),
+  env = list(
+    outer = outer,
+    add_calls = add_calls,
+    len = length(vars)
+  )
+)
+print(rms)
+
+DT[, j, env = list(j = rms)]
+
+# same, but skipping last substitute2 call and using add_calls directly
+DT[, outer((add_calls) / len),
+   env = list(
+     outer = outer,
+     add_calls = add_calls,
+     len = length(vars)
+  )]
+
+# return as data.table
+j = substitute2(j, list(j = as.list(setNames(nm = c(vars, "Species", "rms")))))
+j[["rms"]] = rms
+print(j)
+DT[, j, env = list(j = j)]
+
+# alternatively
+j = as.call(c(
+  quote(list),
+  lapply(setNames(nm = vars), as.name),
+  list(Species = as.name("Species")),
+  list(rms = rms)
+))
+print(j)
+DT[, j, env = list(j = j)]
+```
+
+## Retired interfaces
+
+In `[.data.table`, it is also possible to use other mechanisms for variable substitution or for passing quoted expressions. These include `get` and `mget` for inline injection of variables by providing their names as strings, and `eval` that tells `[.data.table` that the expression we passed into an argument is a quoted expression and that it should be handled differently. Those interfaces should now be considered retired and we recommend using the new `env` argument, instead.
+
+### `get`
+
+```{r opt_max_print_4b, include = FALSE}
+options(max.print = 4L) # 2 rows
+```
+
+```{r old_get}
+v1 = "Petal.Width"
+v2 = "Sepal.Width"
+
+DT[, .(total = sum(get(v1), get(v2)))]
+
+DT[, .(total = sum(v1, v2)),
+   env = list(v1 = v1, v2 = v2)]
+```
+
+### `mget`
+
+```{r old_mget}
+v = c("Petal.Width", "Sepal.Width")
+
+DT[, lapply(mget(v), mean)]
+
+DT[, lapply(v, mean),
+   env = list(v = as.list(v))]
+
+DT[, lapply(v, mean),
+   env = list(v = as.list(setNames(nm = v)))]
+```
+
+### `eval`
+
+Instead of using `eval` function we can provide quoted expression into the element of `env` argument, no extra `eval` call is needed then.
+
+```{r old_eval}
+cl = quote(
+  .(Petal.Width = mean(Petal.Width), Sepal.Width = mean(Sepal.Width))
+)
+
+DT[, eval(cl)]
+
+DT[, cl, env = list(cl = cl)]
+```
diff --git a/vignettes/datatable-reference-semantics.Rmd b/vignettes/datatable-reference-semantics.Rmd
index 4747a76fd2..2f3457056c 100644
--- a/vignettes/datatable-reference-semantics.Rmd
+++ b/vignettes/datatable-reference-semantics.Rmd
@@ -96,12 +96,12 @@ It can be used in `j` in two ways:
 
 (b) The functional form
 
-	```{r eval = FALSE}
-	DT[, `:=`(colA = valA, # valA is assigned to colA
-	          colB = valB, # valB is assigned to colB
-	          ...
-	)]
-	```
+    ```{r eval = FALSE}
+    DT[, `:=`(colA = valA, # valA is assigned to colA
+              colB = valB, # valB is assigned to colB
+              ...
+    )]
+    ```
 
 #### {.bs-callout .bs-callout-warning}
 
@@ -266,7 +266,7 @@ flights[, c("speed", "max_speed", "max_dep_delay", "max_arr_delay") := NULL]
 head(flights)
 ```
 
-## 3) `:=` and `copy()`
+## 3. `:=` and `copy()`
 
 `:=` modifies the input object by reference. Apart from the features we have discussed already, sometimes we might want to use the update by reference feature for its side effect. And at other times it may not be desirable to modify the original object, in which case we can use `copy()` function, as we will see in a moment.
 
diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd
index c9fb43dabd..9c55cdbd0a 100644
--- a/vignettes/datatable-reshape.Rmd
+++ b/vignettes/datatable-reshape.Rmd
@@ -190,30 +190,111 @@ Since we'd like for `data.table`s to perform this operation straightforward and
 The idea is quite simple. We pass a list of columns to `measure.vars`, where each element of the list contains the columns that should be combined together.
 
 ```{r}
-colA = paste("dob_child", 1:3, sep = "")
-colB = paste("gender_child", 1:3, sep = "")
+colA = paste0("dob_child", 1:3)
+colB = paste0("gender_child", 1:3)
 DT.m2 = melt(DT, measure = list(colA, colB), value.name = c("dob", "gender"))
 DT.m2
 
 str(DT.m2) ## col type is preserved
 ```
 
+#### {.bs-callout .bs-callout-info}
+
+* We can remove the `variable` column if necessary.
+
+* The functionality is implemented entirely in C, and is therefore both *fast* and *memory efficient* in addition to being *straightforward*.
+
 #### - Using `patterns()`
 
 Usually in these problems, the columns we'd like to melt can be distinguished by a common pattern. We can use the function `patterns()`, implemented for convenience, to provide regular expressions for the columns to be combined together. The above operation can be rewritten as:
 
 ```{r}
 DT.m2 = melt(DT, measure = patterns("^dob", "^gender"), value.name = c("dob", "gender"))
-DT.m2
+print(DT.m2, class=TRUE)
 ```
 
-That's it!
+#### - Using `measure()` to specify `measure.vars` via separator or pattern
 
-#### {.bs-callout .bs-callout-info}
+If, as in the data above, the input columns to melt have regular
+names, then we can use `measure`, which allows specifying the columns
+to melt via a separator or a regex. For example consider the iris
+data,
 
-* We can remove the `variable` column if necessary.
+```{r}
+(two.iris = data.table(datasets::iris)[c(1,150)])
+```
 
-* The functionality is implemented entirely in C, and is therefore both *fast* and *memory efficient* in addition to being *straightforward*.
+The iris data has four numeric columns with a regular structure: first
+the flower part, then a period, then the measurement dimension. To
+specify that we want to melt those four columns, we can use `measure`
+with `sep="."` which means to use `strsplit` on all column names; the
+columns which result in the maximum number of groups after splitting
+will be used as `measure.vars`:
+
+```{r}
+melt(two.iris, measure.vars = measure(part, dim, sep="."))
+```
+
+The first two arguments to `measure` in the code above (`part` and
+`dim`) are used to name the output columns; the number of arguments
+must equal the max number of groups after splitting with `sep`.
+
+If we want two value columns, one for each part, we can use the
+special `value.name` keyword, which means to output a value column
+for each unique name found in that group:
+
+```{r}
+melt(two.iris, measure.vars = measure(value.name, dim, sep="."))
+```
+
+Using the code above we get one value column per flower part. If we
+instead want a value column for each measurement dimension, we can do
+
+```{r}
+melt(two.iris, measure.vars = measure(part, value.name, sep="."))
+```
+
+Going back to the example of the data with families and children, we
+can see a more complex usage of `measure`, involving a function which
+is used to convert the `child` string values to integers:
+
+```{r}
+DT.m3 = melt(DT, measure = measure(value.name, child=as.integer, sep="_child"))
+print(DT.m3, class=TRUE)
+```
+
+In the code above we used `sep="_child"` which results in melting only
+the columns which contain that string (six column names split into two
+groups each). The `child=as.integer` argument means the second group
+will result in an output column named `child` with values defined by
+plugging the character strings from that group into the function
+`as.integer`.
+
+Finally we consider an example (borrowed from tidyr package) where we
+need to define the groups using a regular expression rather than a
+separator.
+
+```{r}
+(who <- data.table(id=1, new_sp_m5564=2, newrel_f65=3))
+melt(who, measure.vars = measure(
+  diagnosis, gender, ages, pattern="new_?(.*)_(.)(.*)"))
+```
+
+When using the `pattern` argument, it must be a Perl-compatible
+regular expression containing the same number of capture groups
+(parenthesized sub-expressions) as the number other arguments (group
+names). The code below shows how to use a more complex regex with five
+groups, two numeric output columns, and an anonymous type conversion
+function,
+
+```{r}
+print(melt(who, measure.vars = measure(
+  diagnosis, gender, ages,
+  ymin=as.numeric,
+  ymax=function(y)ifelse(y=="", Inf, as.numeric(y)),
+  pattern="new_?(.*)_(.)(([0-9]{2})([0-9]{0,2}))"
+)), class=TRUE)
+```
 
 ### b) Enhanced `dcast`
 
diff --git a/vignettes/datatable-sd-usage.Rmd b/vignettes/datatable-sd-usage.Rmd
index 8f23c58554..fda2c4751f 100644
--- a/vignettes/datatable-sd-usage.Rmd
+++ b/vignettes/datatable-sd-usage.Rmd
@@ -143,7 +143,7 @@ Varying model specification is a core feature of robust statistical analysis. Le
 
 Here's a short script leveraging the power of `.SD` which explores this question:
 
-```{r sd_for_lm, cache = FALSE}
+```{r sd_for_lm, cache = FALSE, fig.cap="Fit OLS coefficient on W, various specifications, depicted as bars with distinct colors."}
 # this generates a list of the 2^k possible extra variables
 #   for models of the form ERA ~ G + (...)
 extra_var = c('yearID', 'teamID', 'G', 'L')
@@ -199,9 +199,7 @@ Note that the `x[y]` syntax returns `nrow(y)` values (i.e., it's a right join),
 
 Often, we'd like to perform some operation on our data _at the group level_. When we specify `by =` (or `keyby = `), the mental model for what happens when `data.table` processes `j` is to think of your `data.table` as being split into many component sub-`data.table`s, each of which corresponds to a single value of your `by` variable(s):
 
-```{r grouping_png, fig.cap = "Grouping, Illustrated", echo = FALSE}
-knitr::include_graphics('plots/grouping_illustration.png')
-```
+![Grouping, Illustrated](plots/grouping_illustration.png 'A visual depiction of how grouping works. On the left is a grid. The first column is titled "ID COLUMN" with values the capital letters A through G, and the rest of the data is unlabelled, but is in a darker color and simply has "Data" written to indicate that's arbitrary. A right arrow shows how this data is split into groups. Each capital letter A through G has a grid on the right-hand side; the grid on the left has been subdivided to create that on the right.')
 
 In the case of grouping, `.SD` is multiple in nature -- it refers to _each_ of these sub-`data.table`s, _one-at-a-time_ (slightly more accurately, the scope of `.SD` is a single sub-`data.table`). This allows us to concisely express an operation that we'd like to perform on _each sub-`data.table`_ before the re-assembled result is returned to us.
 
@@ -237,7 +235,7 @@ _NB_: `.SD[1L]` is currently optimized by [_`GForce`_](https://Rdatatable.gitlab
 
 Returning to the inquiry above regarding the relationship between `ERA` and `W`, suppose we expect this relationship to differ by team (i.e., there's a different slope for each team). We can easily re-run this regression to explore the heterogeneity in this relationship as follows (noting that the standard errors from this approach are generally incorrect -- the specification `ERA ~ W*teamID` will be better -- this approach is easier to read and the _coefficients_ are OK):
 
-```{r group_lm, results = 'hide'}
+```{r group_lm, results = 'hide', fig.cap="A histogram depicting the distribution of fitted coefficients. It is vaguely bell-shaped and concentrated around -.2"}
 # Overall coefficient for comparison
 overall_coef = Pitching[ , coef(lm(ERA ~ W))['W']]
 # use the .N > 20 filter to exclude teams with few observations

From 84f47606ee89ad6ba363b420b01ed196f335e72a Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Sat, 31 Jul 2021 12:34:38 -0400
Subject: [PATCH 13/32] bug fix, match variable names

---
 R/notin.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/notin.R b/R/notin.R
index ae9224571b..5fc65a208c 100644
--- a/R/notin.R
+++ b/R/notin.R
@@ -1,5 +1,5 @@
 "%notin%" = function(x, table) {
-  if (is.character(example)) {
+  if (is.character(x) && is.character((table))) {
     return(.Call(Cchin, x, table, TRUE))
   } else {
     return(!match(x, table, nomatch = 0))

From 195a0b576d4a914c3560604621841f736e0b09bd Mon Sep 17 00:00:00 2001
From: Michael <mczekanski1@gmail.com>
Date: Sat, 23 Apr 2022 23:35:48 -0400
Subject: [PATCH 14/32] remove extra character

---
 NAMESPACE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index c750ca2c3e..44676f9f5b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -8,7 +8,7 @@ exportClasses(data.table, IDate, ITime)
 export(data.table, tables, setkey, setkeyv, key, "key<-", haskey, CJ, SJ, copy)
 export(setindex, setindexv, indices)
 export(as.data.table,is.data.table,test.data.table)
-export(last,first,like,"%like%","%ilike%","%flike%","%plike%",between,"%between%",inrange,"%inrange%", "%notin%"))
+export(last,first,like,"%like%","%ilike%","%flike%","%plike%",between,"%between%",inrange,"%inrange%", "%notin%")
 export(timetaken)
 export(truelength, setalloccol, alloc.col, ":=", let)
 export(setattr, setnames, setcolorder, set, setDT, setDF)

From b759e93741dd1ef9e92ac6100df692a0b0ca7309 Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Fri, 17 Jun 2022 18:10:18 -0400
Subject: [PATCH 15/32] remove chmatch changes

---
 src/chmatch.c    | 33 ++++++++++-----------------------
 src/data.table.h |  5 ++++-
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/src/chmatch.c b/src/chmatch.c
index d7fb90a573..61e36ef60e 100644
--- a/src/chmatch.c
+++ b/src/chmatch.c
@@ -19,12 +19,6 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
       error(_("x is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(x)));
     }
   }
-  
-  // negate inputs if needed
-  int chinNoMatch = negate ? 1 : 0;
-  int match = negate ? 0 : 1;
-  nomatch = negate ? 1 : nomatch;
-  
   // allocations up front before savetl starts in case allocs fail
   int nprotect=0;
   SEXP ans = PROTECT(allocVector(chin?LGLSXP:INTSXP, xlen)); nprotect++;
@@ -35,7 +29,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
   int *ansd = INTEGER(ans);
   const int tablelen = length(table);
   if (tablelen==0) {
-    const int val=(chin?chinNoMatch:nomatch), n=xlen;
+    const int val=(chin?0:nomatch), n=xlen;
     for (int i=0; i<n; ++i) ansd[i]=val;
     UNPROTECT(nprotect);
     return ans;
@@ -53,7 +47,7 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
     ansd[0] = nomatch;
     for (int i=0; i<tablelen; ++i) {
       if (td[i]==xd[0]) {
-        ansd[0] = chin ? match : i+1;
+        ansd[0] = chin ? 1 : i+1;
         break; // short-circuit early; if there are dups in table the first is returned
       }
     }
@@ -126,16 +120,9 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
     free(counts);
     free(map);
   } else if (chin) {
-    if (negate){
-      for (int i=0; i<xlen; i++) {
-        ansd[i] = TRUELENGTH(xd[i])>=0;
-      }
-    } else {
-      for (int i=0; i<xlen; i++) {
-        ansd[i] = TRUELENGTH(xd[i])<0;
-      }
+    for (int i=0; i<xlen; i++) {
+      ansd[i] = TRUELENGTH(xd[i])<0;
     }
-    
   } else {
     for (int i=0; i<xlen; i++) {
       const int m = TRUELENGTH(xd[i]);
@@ -151,21 +138,21 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
 
 // for internal use from C :
 SEXP chmatch(SEXP x, SEXP table, int nomatch) {  // chin=  chmatchdup=
-  return chmatchMain(x, table, nomatch,             false, false, false);
+  return chmatchMain(x, table, nomatch,             false, false);
 }
-SEXP chin(SEXP x, SEXP table, bool negate) {
-  return chmatchMain(x, table, 0,                   true,  false, negate);
+SEXP chin(SEXP x, SEXP table) {
+  return chmatchMain(x, table, 0,                   true,  false);
 }
 
 // for use from internals at R level; chmatch and chin are exported too but not chmatchdup yet
 SEXP chmatch_R(SEXP x, SEXP table, SEXP nomatch) {
-  return chmatchMain(x, table, INTEGER(nomatch)[0], false, false, false);
+  return chmatchMain(x, table, INTEGER(nomatch)[0], false, false);
 }
 SEXP chin_R(SEXP x, SEXP table, SEXP negate) {
-  return chmatchMain(x, table, 0,                   true,  false, LOGICAL(negate)[0]);
+  return chmatchMain(x, table, 0,                   true,  false);
 }
 SEXP chmatchdup_R(SEXP x, SEXP table, SEXP nomatch) {
-  return chmatchMain(x, table, INTEGER(nomatch)[0], false, true, false);
+  return chmatchMain(x, table, INTEGER(nomatch)[0], false, true);
 }
 
 /*
diff --git a/src/data.table.h b/src/data.table.h
index 8881fbe917..9b1ad62cc2 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -155,7 +155,7 @@ SEXP uniqlengths(SEXP x, SEXP n);
 
 // chmatch.c
 SEXP chmatch(SEXP x, SEXP table, int nomatch);
-SEXP chin(SEXP x, SEXP table, bool negate);
+SEXP chin(SEXP x, SEXP table);
 
 SEXP isOrderedSubset(SEXP, SEXP);
 void setselfref(SEXP);
@@ -259,3 +259,6 @@ int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...);
 
 // programming.c
 SEXP substitute_call_arg_namesR(SEXP expr, SEXP env);
+
+//negate.c
+SEXP negate(SEXP x);

From 670f70ad0f83bb2b8d9944cc960c374fda678b2a Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Fri, 17 Jun 2022 18:12:00 -0400
Subject: [PATCH 16/32] revert more chmatch changes

---
 src/assign.c  | 4 ++--
 src/chmatch.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/assign.c b/src/assign.c
index a443be5d6a..7fb09fa71e 100644
--- a/src/assign.c
+++ b/src/assign.c
@@ -536,7 +536,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
   if (length(key)) {
     // if assigning to at least one key column, the key is truncated to one position before the first changed column.
     //any() and subsetVector() don't seem to be exposed by R API at C level, so this is done here long hand.
-    PROTECT(tmp = chin(key, assignedNames, false)); protecti++;
+    PROTECT(tmp = chin(key, assignedNames)); protecti++;
     int newKeyLength = length(key);
     for (int i=0; i<LENGTH(tmp); ++i) if (LOGICAL(tmp)[i]) {
       // If a key column is being assigned to, set newKeyLength to the key element before since everything after that may have changed in order.
@@ -624,7 +624,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
       } else if(newKeyLength < strlen(c1)) {
         SEXP s4Str = PROTECT(mkString(s4));
         if(indexLength == 0 && // shortened index can be kept since it is just information on the order (see #2372)
-           LOGICAL(chin(s4Str, indexNames, false))[0] == 0) {// index with shortened name not present yet
+           LOGICAL(chin(s4Str, indexNames))[0] == 0) {// index with shortened name not present yet
           SET_TAG(s, install(s4));
           SET_STRING_ELT(indexNames, indexNo, mkChar(s4));
           if (verbose)
diff --git a/src/chmatch.c b/src/chmatch.c
index 61e36ef60e..a091e646f0 100644
--- a/src/chmatch.c
+++ b/src/chmatch.c
@@ -1,6 +1,6 @@
 #include "data.table.h"
 
-static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatchdup, bool negate) {
+static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatchdup) {
   if (!isString(table) && !isNull(table))
     error(_("table is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(table)));
   if (chin && chmatchdup)
@@ -148,7 +148,7 @@ SEXP chin(SEXP x, SEXP table) {
 SEXP chmatch_R(SEXP x, SEXP table, SEXP nomatch) {
   return chmatchMain(x, table, INTEGER(nomatch)[0], false, false);
 }
-SEXP chin_R(SEXP x, SEXP table, SEXP negate) {
+SEXP chin_R(SEXP x, SEXP table) {
   return chmatchMain(x, table, 0,                   true,  false);
 }
 SEXP chmatchdup_R(SEXP x, SEXP table, SEXP nomatch) {

From 1afe73883592259faa6987e71e6e54cf6a604494 Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Fri, 17 Jun 2022 18:13:51 -0400
Subject: [PATCH 17/32] revert more parameters

---
 R/data.table.R | 2 +-
 src/subset.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 85bf0fd29b..473cf6e766 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -2726,7 +2726,7 @@ chmatchdup = function(x, table, nomatch=NA_integer_)
   .Call(Cchmatchdup, x, table, as.integer(nomatch[1L]))
 
 "%chin%" = function(x, table)
-  .Call(Cchin, x, table, FALSE)  # TO DO  if table has 'ul' then match to that
+  .Call(Cchin, x, table)  # TO DO  if table has 'ul' then match to that
 
 chorder = function(x) {
   o = forderv(x, sort=TRUE, retGrp=FALSE)
diff --git a/src/subset.c b/src/subset.c
index 7df57d9296..2158451798 100644
--- a/src/subset.c
+++ b/src/subset.c
@@ -339,7 +339,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md
   // but maintain key if ordered subset
   SEXP key = getAttrib(x, sym_sorted);
   if (length(key)) {
-    SEXP in = PROTECT(chin(key, getAttrib(ans,R_NamesSymbol), false)); nprotect++;
+    SEXP in = PROTECT(chin(key, getAttrib(ans,R_NamesSymbol))); nprotect++;
     int i = 0;  while(i<LENGTH(key) && LOGICAL(in)[i]) i++;
     // i is now the keylen that can be kept. 2 lines above much easier in C than R
     if (i==0 || !orderedSubset) {

From 6a6c44cf85ea5ac314904144e0f6a75aa375ed91 Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Fri, 17 Jun 2022 18:15:19 -0400
Subject: [PATCH 18/32] implement negate function

---
 R/notin.R    |  6 ++++--
 src/negate.c | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 src/negate.c

diff --git a/R/notin.R b/R/notin.R
index 5fc65a208c..de1adf7911 100644
--- a/R/notin.R
+++ b/R/notin.R
@@ -1,7 +1,9 @@
 "%notin%" = function(x, table) {
   if (is.character(x) && is.character((table))) {
-    return(.Call(Cchin, x, table, TRUE))
+    result = .Call(Cchin, x, table)
   } else {
-    return(!match(x, table, nomatch = 0))
+    result = match(x, table, nomatch = 0) > 0
   }
+  return(.Call(Cnegate, result))
+  
 }
diff --git a/src/negate.c b/src/negate.c
new file mode 100644
index 0000000000..44e6ef7ed3
--- /dev/null
+++ b/src/negate.c
@@ -0,0 +1,14 @@
+#include "data.table.h"
+
+SEXP negate(SEXP x) {
+  if(TYPEOF(x) != LGLSXP) {
+    error("not logical or integer vector");
+  }
+  int n = length(x);
+  int *ansd = INTEGER(x);
+  for(int i=0;i<n;i++) {
+    ansd[i] = LOGICAL(x)[i]?0:1;
+  }
+  return x;
+}
+

From 9631a5fb027ce60ad4c29c6836f58dc4d8c902c0 Mon Sep 17 00:00:00 2001
From: mczekanski1 <mczekanski1@gmail.com>
Date: Fri, 17 Jun 2022 18:32:38 -0400
Subject: [PATCH 19/32] update negate implementation

---
 src/init.c   | 1 +
 src/negate.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/init.c b/src/init.c
index fd43b956e5..daa88b1523 100644
--- a/src/init.c
+++ b/src/init.c
@@ -230,6 +230,7 @@ R_CallMethodDef callMethods[] = {
 {"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1},
 {"CstartsWithAny", (DL_FUNC)&startsWithAny, -1},
 {"CconvertDate", (DL_FUNC)&convertDate, -1},
+{"Cnegate", (DL_FUNC)&negate, -1},
 {NULL, NULL, 0}
 };
 
diff --git a/src/negate.c b/src/negate.c
index 44e6ef7ed3..64bc9e2a65 100644
--- a/src/negate.c
+++ b/src/negate.c
@@ -7,7 +7,7 @@ SEXP negate(SEXP x) {
   int n = length(x);
   int *ansd = INTEGER(x);
   for(int i=0;i<n;i++) {
-    ansd[i] = LOGICAL(x)[i]?0:1;
+    ansd[i] = LOGICAL(x)[i] == TRUE?0:1;
   }
   return x;
 }

From bb8ca03012fc21063f18af8d99b560abf3fef4e8 Mon Sep 17 00:00:00 2001
From: Michael <mczekanski1@gmail.com>
Date: Tue, 21 Jun 2022 22:34:03 -0400
Subject: [PATCH 20/32] create negateByRef, move negation to C

---
 R/notin.R        |  8 ++++----
 src/data.table.h |  2 +-
 src/init.c       |  3 ++-
 src/negate.c     | 12 +++++++++---
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/R/notin.R b/R/notin.R
index de1adf7911..e5c109ac5c 100644
--- a/R/notin.R
+++ b/R/notin.R
@@ -1,9 +1,9 @@
 "%notin%" = function(x, table) {
-  if (is.character(x) && is.character((table))) {
-    result = .Call(Cchin, x, table)
+  if (is.character(x) && is.character(table)) {
+    result = .Call(Cnotchin, x, table)
   } else {
-    result = match(x, table, nomatch = 0) > 0
+    result = match(x, table, nomatch = 0) == 0
   }
-  return(.Call(Cnegate, result))
+  return(result)
   
 }
diff --git a/src/data.table.h b/src/data.table.h
index 9b1ad62cc2..b966e86c08 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -261,4 +261,4 @@ int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...);
 SEXP substitute_call_arg_namesR(SEXP expr, SEXP env);
 
 //negate.c
-SEXP negate(SEXP x);
+SEXP notchin(SEXP x, SEXP table);
diff --git a/src/init.c b/src/init.c
index daa88b1523..284c30b4fd 100644
--- a/src/init.c
+++ b/src/init.c
@@ -131,6 +131,7 @@ SEXP test_dt_win_snprintf();
 SEXP dt_zlib_version();
 SEXP startsWithAny();
 SEXP convertDate();
+SEXP notchin();
 
 // .Externals
 SEXP fastmean();
@@ -230,7 +231,7 @@ R_CallMethodDef callMethods[] = {
 {"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1},
 {"CstartsWithAny", (DL_FUNC)&startsWithAny, -1},
 {"CconvertDate", (DL_FUNC)&convertDate, -1},
-{"Cnegate", (DL_FUNC)&negate, -1},
+{"Cnotchin", (DL_FUNC)&notchin, -1},
 {NULL, NULL, 0}
 };
 
diff --git a/src/negate.c b/src/negate.c
index 64bc9e2a65..8876aaf498 100644
--- a/src/negate.c
+++ b/src/negate.c
@@ -1,14 +1,20 @@
 #include "data.table.h"
 
-SEXP negate(SEXP x) {
+void negateByRef(SEXP x) {
   if(TYPEOF(x) != LGLSXP) {
     error("not logical or integer vector");
   }
   int n = length(x);
   int *ansd = INTEGER(x);
   for(int i=0;i<n;i++) {
-    ansd[i] = LOGICAL(x)[i] == TRUE?0:1;
+    if (ansd[i] != NA_LOGICAL) ansd[i] ^= 1;
   }
-  return x;
+}
+
+
+SEXP notchin(SEXP x, SEXP table) {
+  SEXP result = chin(x, table);
+  negateByRef(result);
+  return result;
 }
 

From 639ac63fdfb2b2ef611cbb8b5d9d1d005d5396fd Mon Sep 17 00:00:00 2001
From: Michael <mczekanski1@gmail.com>
Date: Wed, 22 Jun 2022 20:23:58 -0400
Subject: [PATCH 21/32] incorporate feedback on PROTECT, LOGICAL

---
 R/notin.R    | 2 +-
 src/negate.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/notin.R b/R/notin.R
index e5c109ac5c..cff5e9a94c 100644
--- a/R/notin.R
+++ b/R/notin.R
@@ -2,7 +2,7 @@
   if (is.character(x) && is.character(table)) {
     result = .Call(Cnotchin, x, table)
   } else {
-    result = match(x, table, nomatch = 0) == 0
+    result = match(x, table, nomatch = 0L) == 0
   }
   return(result)
   
diff --git a/src/negate.c b/src/negate.c
index 8876aaf498..337ee7ec63 100644
--- a/src/negate.c
+++ b/src/negate.c
@@ -4,16 +4,16 @@ void negateByRef(SEXP x) {
   if(TYPEOF(x) != LGLSXP) {
     error("not logical or integer vector");
   }
-  int n = length(x);
-  int *ansd = INTEGER(x);
+  const int n = length(x);
+  int *ansd = LOGICAL(x);
   for(int i=0;i<n;i++) {
-    if (ansd[i] != NA_LOGICAL) ansd[i] ^= 1;
+    ansd[i] ^= (ansd[i] != NA_LOGICAL); 
   }
 }
 
 
 SEXP notchin(SEXP x, SEXP table) {
-  SEXP result = chin(x, table);
+  SEXP result = PROTECT(chin(x, table));
   negateByRef(result);
   return result;
 }

From 002ea5754a95c8fafefdc5b647019cef93126e6a Mon Sep 17 00:00:00 2001
From: Michael <mczekanski1@gmail.com>
Date: Thu, 23 Jun 2022 18:58:27 -0400
Subject: [PATCH 22/32] remove return, add UNPROTECT

---
 R/notin.R    | 6 ++----
 src/negate.c | 1 +
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/R/notin.R b/R/notin.R
index cff5e9a94c..ba5cef5025 100644
--- a/R/notin.R
+++ b/R/notin.R
@@ -1,9 +1,7 @@
 "%notin%" = function(x, table) {
   if (is.character(x) && is.character(table)) {
-    result = .Call(Cnotchin, x, table)
+    .Call(Cnotchin, x, table)
   } else {
-    result = match(x, table, nomatch = 0L) == 0
+    match(x, table, nomatch = 0L) == 0L
   }
-  return(result)
-  
 }
diff --git a/src/negate.c b/src/negate.c
index 337ee7ec63..ea2b039443 100644
--- a/src/negate.c
+++ b/src/negate.c
@@ -15,6 +15,7 @@ void negateByRef(SEXP x) {
 SEXP notchin(SEXP x, SEXP table) {
   SEXP result = PROTECT(chin(x, table));
   negateByRef(result);
+  UNPROTECT(1);
   return result;
 }
 

From 2ea8f725c66bb9b624eef3341d6aa511633325dd Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 21:11:04 -0600
Subject: [PATCH 23/32] Moved news item and embellished

---
 NEWS.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index e6f218d7dc..039eee5ce4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,7 +7,6 @@
 ## NEW FEATURES
 
 1. `nafill()` now applies `fill=` to the front/back of the vector when `type="locf|nocb"`, [#3594](https://github.com/Rdatatable/data.table/issues/3594). Thanks to @ben519 for the feature request. It also now returns a named object based on the input names. Note that if you are considering joining and then using `nafill(...,type='locf|nocb')` afterwards, please review `roll=`/`rollends=` which should achieve the same result in one step more efficiently. `nafill()` is for when filling-while-joining (i.e. `roll=`/`rollends=`/`nomatch=`) cannot be applied.
-2. %notin% added to compute opposite of %in%, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR.
 
 2. `mean(na.rm=TRUE)` by group is now GForce optimized, [#4849](https://github.com/Rdatatable/data.table/issues/4849). Thanks to the [h2oai/db-benchmark](https://github.com/h2oai/db-benchmark) project for spotting this issue. The 1 billion row example in the issue shows 48s reduced to 14s. The optimization also applies to type `integer64` resulting in a difference to the `bit64::mean.integer64` method: `data.table` returns a `double` result whereas `bit64` rounds the mean to the nearest integer.
 
@@ -295,6 +294,8 @@
 
 40.  New functions `yearmon()` and `yearqtr` give a combined representation of `year()` and `month()`/`quarter()`. These and also `yday`, `wday`, `mday`, `week`, `month` and `year` are now optimized for memory and compute efficiency by removing the `POSIXlt` dependency, [#649](https://github.com/Rdatatable/data.table/issues/649). Thanks to Matt Dowle for the request, and Benjamin Schwendinger for the PR.
 
+41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error.
+
 ## BUG FIXES
 
 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.

From 8a311d31e59e2a40ce2de9591550a04bf626aa7c Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 21:15:54 -0600
Subject: [PATCH 24/32] Added Michael to contributor list in DESCRIPTION

---
 DESCRIPTION | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 924bdeb2dc..586ef0f308 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -71,7 +71,8 @@ Authors@R: c(
   person("Boniface Christian","Kamgang", role="ctb"),
   person("Olivier","Delmarcell",   role="ctb"),
   person("Josh","O'Brien",         role="ctb"),
-  person("Dereck","de Mezquita",   role="ctb"))
+  person("Dereck","de Mezquita",   role="ctb"),
+  person("Michael","Czekanski",    role="ctb"))
 Depends: R (>= 3.1.0)
 Imports: methods
 Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown

From 44b002e494cc49a0b423ffd590c30a6cd99bac9a Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 21:32:09 -0600
Subject: [PATCH 25/32] comments and spacing

---
 src/negate.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/negate.c b/src/negate.c
index ea2b039443..e0d038138f 100644
--- a/src/negate.c
+++ b/src/negate.c
@@ -5,16 +5,17 @@ void negateByRef(SEXP x) {
     error("not logical or integer vector");
   }
   const int n = length(x);
-  int *ansd = LOGICAL(x);
-  for(int i=0;i<n;i++) {
-    ansd[i] ^= (ansd[i] != NA_LOGICAL); 
+  Rboolean *ansd = LOGICAL(x);
+  for(int i=0; i<n; ++i) {
+    ansd[i] ^= (ansd[i] != NA_LOGICAL);  // invert true/false but leave NA alone
   }
 }
 
 
 SEXP notchin(SEXP x, SEXP table) {
+  // see discussion in PR#4931
   SEXP result = PROTECT(chin(x, table));
-  negateByRef(result);
+  negateByRef(result); // save memory
   UNPROTECT(1);
   return result;
 }

From 1317f78fed2748e332d4c6b74226dd1948a0799c Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 21:51:54 -0600
Subject: [PATCH 26/32] Fix compile warning on Rboolean I added

---
 src/negate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/negate.c b/src/negate.c
index e0d038138f..3b6e051e0e 100644
--- a/src/negate.c
+++ b/src/negate.c
@@ -5,7 +5,7 @@ void negateByRef(SEXP x) {
     error("not logical or integer vector");
   }
   const int n = length(x);
-  Rboolean *ansd = LOGICAL(x);
+  Rboolean *ansd = (Rboolean *)LOGICAL(x);
   for(int i=0; i<n; ++i) {
     ansd[i] ^= (ansd[i] != NA_LOGICAL);  // invert true/false but leave NA alone
   }

From 95c3221017cebb8f3978e86f809f719e29e20c12 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 22:13:26 -0600
Subject: [PATCH 27/32] Bump windows R and Rtools versions

Unrelated to this PR but needed to get windows tests to run for this PR
---
 .gitlab-ci.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index cd9e15c7f4..cd24eb74f4 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -94,15 +94,15 @@ build: ## build data.table sources as tar.gz archive
   - mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION
 
 .test-install-r-rel-win: &install-r-rel-win
-  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.1.3-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
+  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.1-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
   # see #5198 for discussion about the https link used above; it will break each time R is released and the version number will need to be updated
 .test-install-r-devel-win: &install-r-devel-win
   - curl.exe -s -o ../R-devel.exe https://cloud.r-project.org/bin/windows/base/R-devel-win.exe; Start-Process -FilePath ..\R-devel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
 .test-install-r-oldrel-win: &install-r-oldrel-win
-  - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.0.5/R-4.0.5-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
+  - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.1.3/R-4.1.3-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
 
 .test-install-rtools-win: &install-rtools-win
-  - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools40-x86_64.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools40" -NoNewWindow -Wait
+  - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools42/files/rtools42-5253-5107-signed.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools40" -NoNewWindow -Wait
 
 .test-template: &test
   stage: test

From 881799c41e49b2e9842b2af94eb98e4de1ce14a0 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 22:37:34 -0600
Subject: [PATCH 28/32] More windows version path bumps

---
 .gitlab-ci.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index cd24eb74f4..0ad608edd8 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -102,7 +102,7 @@ build: ## build data.table sources as tar.gz archive
   - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.1.3/R-4.1.3-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
 
 .test-install-rtools-win: &install-rtools-win
-  - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools42/files/rtools42-5253-5107-signed.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools40" -NoNewWindow -Wait
+  - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools42/files/rtools42-5253-5107-signed.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools42" -NoNewWindow -Wait
 
 .test-template: &test
   stage: test
@@ -246,7 +246,7 @@ test-rel-win: ## R-release on Windows, test and build binaries
   before_script:
     - *install-r-rel-win
     - *install-rtools-win
-    - $ENV:PATH = "C:\R\bin;C:\rtools40\usr\bin;$ENV:PATH"
+    - $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH"
     - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most'), quiet=TRUE)"
     - *cp-src-win
     - rm.exe -r bus
@@ -284,7 +284,7 @@ test-old-win: ## R-oldrel on Windows
   before_script:
     - *install-r-oldrel-win
     - *install-rtools-win
-    - $ENV:PATH = "C:\R\bin;C:\rtools40\usr\bin;$ENV:PATH"
+    - $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH"
     - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most', exclude=c('knitr','rmarkdown')), quiet=TRUE)"  ## exclude= for #5294
     - *cp-src-win
     - rm.exe -r bus

From 108b3d36d4f07d97340926dc6c188c8f6af00e45 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 22:47:02 -0600
Subject: [PATCH 29/32] Windows: remove VERYSILENT in hope of seeing error

---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 0ad608edd8..edb151e73b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -94,7 +94,7 @@ build: ## build data.table sources as tar.gz archive
   - mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION
 
 .test-install-r-rel-win: &install-r-rel-win
-  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.1-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
+  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.1-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/DIR=C:\R" -NoNewWindow -Wait
   # see #5198 for discussion about the https link used above; it will break each time R is released and the version number will need to be updated
 .test-install-r-devel-win: &install-r-devel-win
   - curl.exe -s -o ../R-devel.exe https://cloud.r-project.org/bin/windows/base/R-devel-win.exe; Start-Process -FilePath ..\R-devel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait

From 2d9050533aa01c4604b0a979012759ba0dd3283d Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 22:55:15 -0600
Subject: [PATCH 30/32] Windows: remove /DIR as attempt

---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index edb151e73b..8a0d54f728 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -94,7 +94,7 @@ build: ## build data.table sources as tar.gz archive
   - mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION
 
 .test-install-r-rel-win: &install-r-rel-win
-  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.1-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/DIR=C:\R" -NoNewWindow -Wait
+  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.1-win.exe; Start-Process -FilePath ..\R-rel.exe -NoNewWindow -Wait
   # see #5198 for discussion about the https link used above; it will break each time R is released and the version number will need to be updated
 .test-install-r-devel-win: &install-r-devel-win
   - curl.exe -s -o ../R-devel.exe https://cloud.r-project.org/bin/windows/base/R-devel-win.exe; Start-Process -FilePath ..\R-devel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait

From 90813eea97a8b0f6fd74093463a6b912142c507e Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 23:22:22 -0600
Subject: [PATCH 31/32] Marked one error nocov

To reach it would require a new access point to negateByRef which is not worth the effort for this very simple type check
---
 src/negate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/negate.c b/src/negate.c
index 3b6e051e0e..4db3767ff8 100644
--- a/src/negate.c
+++ b/src/negate.c
@@ -2,7 +2,7 @@
 
 void negateByRef(SEXP x) {
   if(TYPEOF(x) != LGLSXP) {
-    error("not logical or integer vector");
+    error("not logical or integer vector");  // # nocov
   }
   const int n = length(x);
   Rboolean *ansd = (Rboolean *)LOGICAL(x);

From c8f15bee23e2830ab3fe00109dd2859d201e2f8f Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 18 Jul 2022 23:31:49 -0600
Subject: [PATCH 32/32] Windows: revert -ArgumentList and bump
 winutf8/ucrt3/rtools42

---
 .gitlab-ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 8a0d54f728..759b51b23b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -94,7 +94,7 @@ build: ## build data.table sources as tar.gz archive
   - mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION
 
 .test-install-r-rel-win: &install-r-rel-win
-  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.1-win.exe; Start-Process -FilePath ..\R-rel.exe -NoNewWindow -Wait
+  - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.1-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
   # see #5198 for discussion about the https link used above; it will break each time R is released and the version number will need to be updated
 .test-install-r-devel-win: &install-r-devel-win
   - curl.exe -s -o ../R-devel.exe https://cloud.r-project.org/bin/windows/base/R-devel-win.exe; Start-Process -FilePath ..\R-devel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait
@@ -264,7 +264,7 @@ test-dev-win: ## R-devel on Windows; see #5294 for changes in Dec 2021 related t
     R_VERSION: "$R_DEVEL_VERSION"
   before_script:
     - *install-r-devel-win
-    - curl.exe -s -o ../rtools.exe https://www.r-project.org/nosvn/winutf8/ucrt3/rtools42-5038-5046.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools42" -NoNewWindow -Wait
+    - *install-rtools-win
     - $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH"
     - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most', exclude=c('knitr','rmarkdown')), quiet=TRUE)"  ## exclude= for #5294
     - *cp-src-win