From f6b91319df2a45728a476d204d809a7b4aec059c Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 22 Jan 2019 19:04:00 -0800
Subject: [PATCH 1/9] Compiles fine without USE_RINTERNALS. First runtime error
 fixed.

---
 src/data.table.h |  1 -
 src/subset.c     | 11 ++++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/data.table.h b/src/data.table.h
index 498c6d6f88..20c496fa27 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -1,5 +1,4 @@
 #include <R.h>
-#define USE_RINTERNALS
 #include <Rinternals.h>
 // #include <signal.h> // the debugging machinery + breakpoint aidee
 // raise(SIGINT);
diff --git a/src/subset.c b/src/subset.c
index fc91dbba8f..81d1d78794 100644
--- a/src/subset.c
+++ b/src/subset.c
@@ -62,11 +62,16 @@ static void subsetVectorRaw(SEXP ans, SEXP source, SEXP idx, const bool anyNA)
     }
   } break;
   case VECSXP : {
-    SEXP *sp = VECTOR_PTR(source);
+    // VECTOR_PTR does exist but returns 'not safe to return vector pointer' when USE_RINTERNALS is not defined.
+    // VECTOR_DATA and LIST_POINTER exist too but call VECTOR_PTR. All are clearly not intended to be used by packages.
+    // The concern is overhead inside VECTOR_ELT() biting when called repetitively in a loop like we do here. That's why
+    // we take the R API (INTEGER()[i], REAL()[i], etc) outside loops for the simple types even when not parallel. For this
+    // type list case (VECSXP) it might be that some items are ALTREP for example, so we really should use the heavier
+    // _ELT accessor (VECTOR_ELT) inside the loop in this case.
     if (anyNA) {
-      for (int i=0; i<n; i++) { int elem = idxp[i]; SET_VECTOR_ELT(ans, i, elem==NA_INTEGER ? R_NilValue : sp[elem-1]); }
+      for (int i=0; i<n; i++) { int elem = idxp[i]; SET_VECTOR_ELT(ans, i, elem==NA_INTEGER ? R_NilValue : VECTOR_ELT(source, elem-1)); }
     } else {
-      for (int i=0; i<n; i++) {                     SET_VECTOR_ELT(ans, i, sp[idxp[i]-1]); }
+      for (int i=0; i<n; i++) {                     SET_VECTOR_ELT(ans, i, VECTOR_ELT(source, idxp[i]-1)); }
     }
   } break;
   case CPLXSXP : {

From e918cc8bd7002c86f54c317e004c48a88898a337 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 22 Jan 2019 19:33:46 -0800
Subject: [PATCH 2/9] REAL/INTEGER => DATAPTR for now

---
 src/dogroups.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/src/dogroups.c b/src/dogroups.c
index 02033af74e..0f7b86ae4b 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -23,7 +23,7 @@ void setSizes() {
 
 SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose)
 {
-  R_len_t i, j, k, rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, r, thisansloc, grpn, thislen, igrp, vlen, origIlen=0, origSDnrow=0;
+  R_len_t i, j, k, rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, vlen, origIlen=0, origSDnrow=0;
   int protecti=0;
   SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp;
   Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE, recycleWarn=TRUE;
@@ -390,11 +390,16 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
     for (j=0; j<ngrpcols; j++) {
       target = VECTOR_ELT(ans,j);
       source = VECTOR_ELT(groups, INTEGER(grpcols)[j]-1);  // target and source the same type by construction above
-      if (SIZEOF(target)==4) for (r=0; r<maxn; r++)
-        INTEGER(target)[ansloc+r] = INTEGER(source)[igrp];
-      else for (r=0; r<maxn; r++)
-        REAL(target)[ansloc+r] = REAL(source)[igrp];
-      // Shouldn't need SET_* to age objects here sice groups, TO DO revisit.
+      if (SIZEOF(target)==4) {
+        int *td = (int *)DATAPTR(target);
+        int *sd = (int *)DATAPTR(source);
+        for (int r=0; r<maxn; r++) td[ansloc+r] = sd[igrp];
+      } else {
+        double *td = (double *)DATAPTR(target);
+        double *sd = (double *)DATAPTR(source);
+        for (int r=0; r<maxn; r++) td[ansloc+r] = sd[igrp];
+      }
+      // Shouldn't need SET_* to age objects here since groups, TO DO revisit.
     }
     for (j=0; j<njval; j++) {
       thisansloc = ansloc;
@@ -411,16 +416,16 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         switch (TYPEOF(target)) {     // rarely called so no need to optimize this switch
         case LGLSXP :
         case INTSXP :
-          for (r=0; r<maxn; r++) INTEGER(target)[thisansloc+r] = NA_INTEGER;
+          for (int r=0; r<maxn; r++) INTEGER(target)[thisansloc+r] = NA_INTEGER;
           break;
         case REALSXP :
-          for (r=0; r<maxn; r++) REAL(target)[thisansloc+r] = NA_REAL;
+          for (int r=0; r<maxn; r++) REAL(target)[thisansloc+r] = NA_REAL;
           break;
         case STRSXP :
-          for (r=0; r<maxn; r++) SET_STRING_ELT(target,thisansloc+r,NA_STRING);
+          for (int r=0; r<maxn; r++) SET_STRING_ELT(target,thisansloc+r,NA_STRING);
           break;
         case VECSXP :
-          for (r=0; r<maxn; r++) SET_VECTOR_ELT(target,thisansloc+r,R_NilValue);
+          for (int r=0; r<maxn; r++) SET_VECTOR_ELT(target,thisansloc+r,R_NilValue);
           break;
         default:
           error("Logical error. Type of column should have been checked by now");

From 626a19e8954e80375cbc137f71d8342392e4584e Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Wed, 23 Jan 2019 19:43:45 -0800
Subject: [PATCH 3/9] modernized memrecycle. recycling length>1 is now error
 for rubustness.

---
 NEWS.md               |   2 +
 inst/tests/tests.Rraw |  77 ++++++++++-----------
 src/assign.c          | 151 +++++++++++++++++++++++-------------------
 src/dogroups.c        |  78 ++++++++++++----------
 src/reorder.c         |  14 ++--
 5 files changed, 175 insertions(+), 147 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index fd01eca538..b5e2c25af8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,8 @@
 
 #### NEW FEATURES
 
+1. `:=` no longer recycles too-short RHS vectors to match the length on the LHS. Like base R, there was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length. All cases are now an error unless `length(RHS)==1` or `length(RHS)==length(LHS)`. Consistent feedback was that recycling is rarely useful and more often a bug. In rare cases where you need to recycle a length>1 vector, use `rep()` explicitly. Single values are still recycled silently as before.
+
 #### BUG FIXES
 
 #### NOTES
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 8bfa6c8673..b881644d95 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -1373,7 +1373,9 @@ x = list(2,"b",2.718)
 test(470, DT[,baz:=x], data.table(a=1:3,b=4:6,foo=list(),bar=list(1,"a",3.14),baz=list(2,"b",2.718)))
 # Test recycling list
 DT = data.table(a=1:4,b=5:8)
-test(471, DT[,foo:=list("a",2:3)], data.table(a=1:4,b=5:8,foo=list("a",2:3,"a",2:3)))
+test(471.1, DT[,foo:=list("a",2:3)], error="Supplied 2 items to be assigned to 4 items of column 'foo'.*recycle")
+test(471.2, names(DT), c("a","b"))  # mismatch length error was caught ok before adding column not after column added
+test(471.3, DT[,foo:=.(list("a"))], data.table(a=1:4,b=5:8,foo=list("a","a","a","a")))
 # Test recycling singleton list
 if (ncol(DT)==3L) DT[,foo:=NULL] # else don't warn here under torture with skip= such that test 471 didn't run
 test(472, DT[,foo:=list(list(2:3))], data.table(a=1:4,b=5:8,foo=list(2:3,2:3,2:3,2:3)))
@@ -1381,9 +1383,9 @@ test(472, DT[,foo:=list(list(2:3))], data.table(a=1:4,b=5:8,foo=list(2:3,2:3,2:3
 # Test adding new column with a recycled factor, #1691
 DT = data.table(a=1:4,b=5:8)
 DT[,c:=factor("a")]
-test(473, DT, data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a"))))
-DT[,d:=factor(c("a","b"))]
-test(474, DT, data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a")),d=factor(c("a","b","a","b"))))
+test(473.1, DT, data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a"))))
+test(473.2, DT[,d:=factor(c("a","b"))], error="Supplied 2 items to be assigned to 4 items of column 'd'")
+test(474, DT[,d:=factor(c("X"))], data.table(a=1:4,b=5:8,c=factor(c("a","a","a","a")),d=factor(c("X","X","X","X"))))
 
 # Test scoping error introduced at 1.6.1, unique(DT) when key column is 'x'
 DT=data.table(x=c("a", "a", "b", "b"), y=c("a", "a", "b", "b"), key="x")
@@ -1410,9 +1412,9 @@ gc()
 test(477, DT, data.table(a=1:3,foo=c(4L,10L,6L),foo2=c(4L,5L,11L)))
 test(478, DT[,foo:=foo], DT)  # does nothing, with no warning, consistent with base R `a<-a`.
 
-# Test that recycling now works with oversized inputs and % != 0 length, both with warnings.
+# Old tests that recycling now works now moved to test error
 DT = data.table(x=1:4)
-test(479, DT[, a:=5:7], data.table(x=1:4,a=c(5:7,5L)), warning="Supplied 3 items to be assigned to 4 items of column 'a' (recycled leaving remainder of 1 items)")
+test(479, DT[, a:=5:7], error="Supplied 3 items to be assigned to 4 items of column 'a'")
 
 # Test that multiple columns can be added
 DT = data.table(x=1:4)
@@ -2070,8 +2072,8 @@ test(745, DT[,ncol(DT):=1L], data.table(c1=1:3,c2=99L,c3=1L))
 DT = data.table(a=letters[c(1:3,3L)],key="a")
 test(746, DT["a",c("new1","new2"):=list(4L, 5L)],
           data.table(a=letters[c(1:3,3L)],new1=INT(4,NA,NA,NA),new2=INT(5,NA,NA,NA),key="a"))
-test(747, DT[,new1:=4:6], data.table(a=letters[c(1:3,3L)],new1=INT(4L,5L,6L,4L),new2=INT(5,NA,NA,NA),key="a"), warning="recycled leaving remainder of 1 item")
-suppressWarnings(DT[,new1:=4:6])
+test(747.1, DT[,new1:=4:6], error="Supplied 3 items to be assigned to 4 items of column 'new1'")
+test(747.2, DT[,new1:=INT(4,5,6,4)], data.table(a=letters[c(1:3,3L)],new1=INT(4L,5L,6L,4L),new2=INT(5,NA,NA,NA),key="a"))
 test(748, DT[c("c","b"),`:=`(new3=.N,new2=sum(new1)+1L),by=.EACHI], data.table(a=letters[c(1:3,3L)],new1=INT(4,5,6,4),new2=INT(5,6,11,11),new3=INT(NA,1,2,2),key="a"))
 
 # and multiple LHS by group, #1710
@@ -2142,12 +2144,10 @@ test(770, DT[J(2:3),.BY[[1]]==b,by=.EACHI], data.table(a=INT(2,2,3,3),V1=c(TRUE,
 # A data.table RHS of := caused a crash, #2311.
 a = data.table(first=1:6, third=c(1,1,1,3,3,4), key="first")
 b = data.table(first=c(3,4,4,5,6,7,8), second=1:7, key="first")
-test(771, b[,third:=a[b,third,by=.EACHI]], b, warning="Supplied 2 items.*to 7.*recycled leaving remainder of 1 item")
-test(772, copy(b)[,third:=as.list(a[b,third,by=.EACHI])], b, warning="Supplied 2 items.*to 7.*recycled leaving remainder of 1 item")
-test(773, b[4,third[[1]]], c(1,3,3,3,4,NA,NA))
-test(774.1, b[,third:=a[b,third,mult="first"]], ans<-data.table(first=c(3,4,4,5,6,7,8), second=1:7, third=c(1,3,3,3,4,NA,NA), key="first"))
-test(774.2, b[,third:=a[b,third]], ans) # mult="first" no longer needed as from v1.9.3.  It now does what was naturally expected.
-
+test(771, b[,third:=a[b,third,by=.EACHI]], error="Supplied 2 items to be assigned to 7 items of column 'third'")
+test(772, b[,third:=as.list(a[b,third,by=.EACHI])], error="Supplied 2 items to be assigned to 7 items of column 'third'")
+test(773, b[,third:=a[b,third,mult="first"]], ans<-data.table(first=c(3,4,4,5,6,7,8), second=1:7, third=c(1,3,3,3,4,NA,NA), key="first"))
+test(774, b[,third:=a[b,third]], ans) # mult="first" no longer needed as from v1.9.3.  It now does what was naturally expected.
 
 # That names are dropped. (Names on the column vectors don't display. They increase size and aren't much use.)
 DT = data.table(a=1:3,b=LETTERS[1:3])
@@ -2302,10 +2302,9 @@ DT = data.table(a=1:3)
 DT[,a:=scale(a)]    # 1 column matrix auto treated as vector
 test(835, na.omit(DT), DT)
 test(836, DT[,a:=as.integer(a)], data.table(a=INT(-1,0,1)))
-test(837, DT[,a:=cbind(1,2)], data.table(a=c(1L,2L,1L)),
-          warning=c("2 column matrix RHS of := will be treated as one vector",
-                    "Supplied 2 items to be assigned to 3 items.*recycled",
-                    "Coerced double RHS to integer to match.*column 1 named 'a'.*values contain no fractions"))
+test(837, DT[,a:=cbind(1,2)],
+  warning = "2 column matrix RHS of := will be treated as one vector",
+  error = "Supplied 2 items to be assigned to 3 items of column 'a'")
 DT = data.table(a=1:3,b=1:6)
 test(838, DT[,c:=scale(b), by=a][,c:=as.integer(1000*c)], data.table(a=1:3,b=1:6,c=rep(as.integer(1000*scale(1:2)), each=3)))
 
@@ -2570,7 +2569,9 @@ test(915, fread(txt,sep="\n"), data.table("A;B;C|D,E"=c("1;3;4|5,6","2;4;6|8,10"
 
 # Crash bug when RHS is 0 length and := by group, fixed in 1.8.7
 DT = data.table(a=1:3,b=1:6)
-test(916, DT[,newcol:=logical(0),by=a], data.table(a=1:3,b=1:6,newcol=NA))
+test(916.1, DT[,newcol:=logical(0),by=a], error="Supplied 0 items to be assigned to group 1 of size 2 in column 'newcol'")
+test(916.2, DT, data.table(a=1:3,b=1:6))  # that newcol was not added since group 1 failed
+test(916.3, DT[,newcol:=NA,by=a], data.table(a=1:3,b=1:6,newcol=NA))
 
 # roll join error when non last join column is factor, #2450
 X = data.table(id=2001:2004, uid=c(1001,1002,1001,1001), state=factor(c('CA','CA','CA','MA')), ts=c(51,52,53,54), key='state,uid,ts')
@@ -2721,10 +2722,7 @@ setkey(dt2, "Date")
 test(980, dt1[dt2, `:=`(A=A+i.A, B=B+i.B, C=i.C)][,list(A,B,C)],
           data.table(A=INT(110,115,120,225,230,235),B=INT(330,325,320,415,410,405),C=rep(1:2,each=3)))
 DT = data.table(A=1:2,B=3:4,C=5:6)
-test(981, DT[,`:=`(D=B+4L,B=0:1,E=A*2L,F=A*3L,C=C+1L,G=C*2L),by=A],
-          data.table(A=1:2,B=0L,C=6:7,D=7:8,E=c(2L,4L),F=c(3L,6L),G=c(10L,12L)),
-          warning=c("RHS 2 is length 2.*group 1.*last 1 element.*discarded",
-                    "RHS 2 is length 2.*group 2.*last 1 element.*discarded"))
+test(981, DT[,`:=`(D=B+4L,B=0:1,E=A*2L,F=A*3L,C=C+1L,G=C*2L),by=A], error="Supplied 2 items to be assigned to group 1 of size 1 in column 'B'")
 DT = data.table(A=1:2,B=3:4,C=5:6)
 test(982, DT[,`:=`(D=B+4L,B=0L,E=A*2L,F=A*3L,C=C+1L,G=C*2L),by=A],
           data.table(A=1:2,B=0L,C=6:7,D=7:8,E=c(2L,4L),F=c(3L,6L),G=c(10L,12L))) # Also note that G is not yet iterative. In future: c(12,14)
@@ -3540,12 +3538,18 @@ test(1131, My_Fun(), DT)
 # Test for #4957 - where `j` doesn't know `.N` when used with `lapply(.SD, function(x) ...)`
 test(1132, DT[, lapply(.SD, function(x) .N), by=ID], data.table(ID=c("A", "B", "C"), Value1=2L, Value2=2L))
 
-# Test for #4990 - `:=` does not generate recycling warning during 'by':
-DT <- data.table(x=c(1,1,1,1,1,2,2))
+# Test for #4990 - `:=` recycle error during 'by'
+DT <- data.table(x=INT(1,1,1,1,1,2,2))
 # on a new column
-test(1133.1, DT[, new := c(1,2), by=x], data.table(x=c(1,1,1,1,1,2,2), new=c(1,2,1,2,1,1,2)), warning="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'")
-# on an already existing column
-test(1133.2, DT[, new := c(1,2), by=x], data.table(x=c(1,1,1,1,1,2,2), new=c(1,2,1,2,1,1,2)), warning="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'")
+test(1133.1, DT[, new := c(1,2), by=x], error="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'")
+test(1133.2, DT, data.table(x=INT(1,1,1,1,1,2,2)))
+# on an already existing column:
+DT[, new:=99L]
+test(1133.3, DT[, new := c(1,2), by=x],   error="Type of RHS ('double') must match LHS ('integer')")
+test(1133.4, DT[, new := c(1L,2L), by=x], error="Supplied 2 items to be assigned to group 1 of size 5 in column 'new'")
+test(1133.5, DT, data.table(x=INT(1,1,1,1,1,2,2), new=99L))
+test(1133.6, DT[, new := rep(-.GRP, .N), by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(-1,-1,-1,-1,-1,-2,-2)))
+test(1133.7, DT[, new := .N, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(5,5,5,5,5,2,2)))
 
 # Fix for FR #2496 - catch `{` in `:=` expression in `j`:
 DT <- data.table(x=c("A", "A", "B", "B"), val =1:4)
@@ -3983,9 +3987,8 @@ DT = CJ(c("Corp", "CORP"), 1:3)
 test(1190, setkey(DT), DT)   # tests no warning here from setkey, was "key rebuilt" due to inconsistent locale sorting in v1.8.10
 
 # non-exact recycling in j results.  Was caught with error in v1.8.10, now recycles with remainder and warning
-DT = data.table(a=1:2,b=1:6)
-test(1191, DT[, list(b,1:2), by=a], data.table(a=INT(1,1,1,2,2,2),b=INT(1,3,5,2,4,6),V2=INT(1,2,1,1,2,1)),
-           warning="Recycled leaving remainder of 1 item.*This warning is once only")
+DT = data.table(a=1:2, b=1:6)
+test(1191, DT[, list(b,1:2), by=a], error="Supplied 2 items for column 2 of group 1 which has 3 rows.")
 
 # that twiddle is used consistently, and tolerance has gone.
 # nice example from : http://stackoverflow.com/questions/21885290/data-table-roll-nearest-returns-multiple-results
@@ -4644,13 +4647,11 @@ test(1288.16, rbindlist(ll, fill=TRUE), error="fill=TRUE, but names of input lis
 
 # fix for #5647
 dt = data.table(x=1L, y=1:10)
-cp = copy(dt)
-test(1289.1, dt[,z := c(rep(NA, 5), y), by=x], cp[, z := c(rep(NA, 5), y[1:5])], warning="RHS 1 is length 15")
-dt = data.table(x=c(1:2), y=1:10)
-cp = copy(dt)
-test(1289.2, dt[, z := c(rep(NA, 5),y), by=x], cp[, z := rep(NA_integer_, 10)],
-             warning=c("RHS 1 is length 10.*group 1.*last 5 element.*discarded",
-                       "RHS 1 is length 10.*group 2.*last 5 element.*discarded"))
+test(1289.1, dt[, z := c(rep(NA,5), y), by=x], error="Supplied 15 items to be assigned to group 1 of size 10 in column 'z'")
+test(1289.2, names(dt), c("x","y"))
+dt = data.table(x=1:2, y=1:10)
+test(1289.3, dt[, z := c(rep(NA,5), y), by=x], error="Supplied 10 items to be assigned to group 1 of size 5 in column 'z'")
+test(1289.4, names(dt), c("x","y"))
 
 ########################################
 # Extensve testing for "duplicate" names
diff --git a/src/assign.c b/src/assign.c
index 8969d17685..1eb04d5f95 100644
--- a/src/assign.c
+++ b/src/assign.c
@@ -443,11 +443,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
     if ((coln+1)<=oldncol && isFactor(VECTOR_ELT(dt,coln)) &&
       !isString(thisvalue) && TYPEOF(thisvalue)!=INTSXP && TYPEOF(thisvalue)!=LGLSXP && !isReal(thisvalue) && !isNewList(thisvalue))  // !=INTSXP includes factor
       error("Can't assign to column '%s' (type 'factor') a value of type '%s' (not character, factor, integer or numeric)", CHAR(STRING_ELT(names,coln)),type2char(TYPEOF(thisvalue)));
-    if (nrow>0 && targetlen>0) {
-      if (vlen>targetlen)
-        warning("Supplied %d items to be assigned to %d items of column '%s' (%d unused)", vlen, targetlen,CHAR(colnam),vlen-targetlen);
-      else if (vlen>0 && targetlen%vlen != 0)
-        warning("Supplied %d items to be assigned to %d items of column '%s' (recycled leaving remainder of %d items).",vlen,targetlen,CHAR(colnam),targetlen%vlen);
+    if (nrow>0 && targetlen>0 && vlen>1 && vlen!=targetlen) {
+      error("Supplied %d items to be assigned to %d items of column '%s'. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code.", vlen, targetlen,CHAR(colnam));
     }
   }
   // having now checked the inputs, from this point there should be no errors so we can now proceed to
@@ -810,23 +807,30 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
   return(dt);  // needed for `*tmp*` mechanism (when := isn't used), and to return the new object after a := for compound syntax.
 }
 
+/*
 static Rboolean anyNamed(SEXP x) {
   if (MAYBE_REFERENCED(x)) return TRUE;
   if (isNewList(x)) for (int i=0; i<LENGTH(x); i++)
     if (anyNamed(VECTOR_ELT(x,i))) return TRUE;
   return FALSE;
 }
+*/
 
 void memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
-// like memcpy but recycles source and takes care of aging
-// 'where' a 1-based INTEGER vector subset of target to assign to,  or NULL or integer()
-// assigns to target[start:start+len-1] or target[where[start:start+len-1]]  where start is 0-based
+// like memcpy but recycles single-item source
+// 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer()
+// assigns to target[start:start+len-1] or target[where[start:start+len-1]] where start is 0-based
 {
-  int r=0, w, protecti=0;
+  // DELETE ...int protecti=0;
   if (len<1) return;
-  int slen = length(source) > len ? len : length(source); // fix for 5647. when length(source) > len, slen must be len.
-  if (slen<1) return;
   if (TYPEOF(target) != TYPEOF(source)) error("Internal error: TYPEOF(target)['%s']!=TYPEOF(source)['%s']", type2char(TYPEOF(target)),type2char(TYPEOF(source))); // # nocov
+  int slen = length(source);
+  if (slen!=1 && slen!=len) error("Internal error: recycle length error not caught earlier. slen=%d len=%d", slen, len); // # nocov
+  // Internal error because the column has already been added to the DT, so length mismatch should have been caught before adding the column.
+  // for 5647 this used to limit slen to len, but no longer
+
+  /*
+  TODELETE ...
   if (isNewList(source)) {
     // A list() column; i.e. target is a column of pointers to SEXPs rather than the much more common case
     // where memrecycle copies the DATAPTR data to the atomic target from the atomic source.
@@ -845,82 +849,93 @@ void memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
       protecti++;
     }
   }
+  */
   size_t size = SIZEOF(target);
   if (!length(where)) {
     switch (TYPEOF(target)) {
-    case INTSXP : case REALSXP : case LGLSXP :
-      break;
-    case STRSXP :
-      for (; r<slen; r++)     // only one SET_STRING_ELT per RHS item is needed to set generations (overhead)
-        SET_STRING_ELT(target, start+r, STRING_ELT(source, r));
-      break;
-    case VECSXP :
-      for (; r<slen; r++)
-        SET_VECTOR_ELT(target, start+r, VECTOR_ELT(source, r));
-        // TO DO: if := in future could ever change a list item's contents by reference, would need to duplicate at that point
+    case LGLSXP: case INTSXP :
+      if (slen==1) {
+        // recycle single items
+        int *td = INTEGER(target);
+        const int val = INTEGER(source)[0];
+        for (int i=0; i<len; i++) td[start+i] = val;  // no R API inside loop as INTEGER has overhead (even when it's an inline function)
+      } else {
+        memcpy(INTEGER(target)+start, INTEGER(source), slen*size);
+      }
       break;
-    default :
-      error("Unsupported type '%s'", type2char(TYPEOF(target)));
-    }
-    if (slen == 1) {
-      if (size==4) for (; r<len; r++)
-        INTEGER(target)[start+r] = INTEGER(source)[0];   // copies pointer on 32bit, sizes checked in init.c
-      else for (; r<len; r++)
-        REAL(target)[start+r] = REAL(source)[0];         // copies pointer on 64bit, sizes checked in init.c
-    } else if (slen<10) {    // 10 is just a guess for when memcpy is faster. Certainly memcpy is slower when slen==1, but that's the most common case by far so not high priority to discover the optimum here. TO DO: revisit
-      if (size==4) for (; r<len; r++)
-        INTEGER(target)[start+r] = INTEGER(source)[r%slen];
-      else for (; r<len; r++)
-        REAL(target)[start+r] = REAL(source)[r%slen];
-    } else {
-      for (r=r>0?1:0; r<(len/slen); r++) {   // if the first slen were done in the switch above, convert r=slen to r=1
-        memcpy((char *)DATAPTR(target) + (start+r*slen)*size,
-             (char *)DATAPTR(source),
-             slen * size);
+    case REALSXP :
+      if (slen==1) {
+        double *td = REAL(target);
+        const double val = REAL(source)[0];
+        for (int i=0; i<len; i++) td[start+i] = val;
+      } else {
+        memcpy(REAL(target)+start, REAL(source), slen*size);
       }
-      memcpy((char *)DATAPTR(target) + (start+r*slen)*size,
-           (char *)DATAPTR(source),
-           (len%slen) * size);
-    }
-  } else {
-    switch (TYPEOF(target)) {
-    case INTSXP : case REALSXP : case LGLSXP :
       break;
     case STRSXP :
-      for (; r<slen; r++) {
-        w = INTEGER(where)[start+r]; if (w<1) continue;  // 0 or NA
-        SET_STRING_ELT(target, w-1, STRING_ELT(source, r));
+      if (slen==1) {
+        const SEXP val = STRING_ELT(source, 0);
+        for (int i=0; i<len; i++) SET_STRING_ELT(target, start+i, val);
+      } else {
+        const SEXP *val = STRING_PTR(source);
+        for (int i=0; i<len; i++) SET_STRING_ELT(target, start+i, val[i]);
       }
       break;
     case VECSXP :
-      for (; r<slen; r++) {
-        w = INTEGER(where)[start+r]; if (w<1) continue;
-        SET_VECTOR_ELT(target, w-1, VECTOR_ELT(source, r));
+      if (slen==1) {
+        const SEXP val = VECTOR_ELT(source, 0);
+        for (int i=0; i<len; i++) SET_VECTOR_ELT(target, start+i, val);
+      } else {
+        const SEXP *val = (SEXP *)DATAPTR(source);  // TODO: revisit VECTOR_PTR
+        for (int i=0; i<len; i++) SET_VECTOR_ELT(target, start+i, val[i]);
       }
       break;
     default :
       error("Unsupported type '%s'", type2char(TYPEOF(target)));
     }
-    if (slen == 1) {
-      if (size==4) for (; r<len; r++) {
-        w = INTEGER(where)[start+r]; if (w<1) continue;
-        INTEGER(target)[ w-1 ] = INTEGER(source)[0];
-      } else for (; r<len; r++) {
-        w = INTEGER(where)[start+r]; if (w<1) continue;
-        REAL(target)[ w-1 ] = REAL(source)[0];
+  } else {
+    const int *wd = INTEGER(where)+start;
+    const int mask = slen==1 ? 0 : INT_MAX;
+    switch (TYPEOF(target)) {
+    case LGLSXP: case INTSXP : {
+      int *td = INTEGER(target);
+      const int *sd = INTEGER(source);
+      for (int i=0; i<len; i++) {
+        const int w = wd[i];
+        if (w<1) continue;  // 0 or NA
+        td[w-1] = sd[i&mask];  // i&mask is for branchless recyle when slen==1
+      }
+    } break;
+    case REALSXP : {
+      double *td = REAL(target);
+      const double *sd = REAL(source);
+      for (int i=0; i<len; i++) {
+        const int w = wd[i];
+        if (w<1) continue;
+        td[w-1] = sd[i&mask];
+      }
+    } break;
+    case STRSXP : {
+      const SEXP *sd = STRING_PTR(source);
+      for (int i=0; i<len; i++) {
+        const int w = wd[i];
+        if (w<1) continue;
+        SET_STRING_ELT(target, w-1, sd[i&mask]);
       }
-    } else {
-      if (size==4) for (; r<len; r++) {
-        w = INTEGER(where)[start+r]; if (w<1) continue;
-        INTEGER(target)[ w-1 ] = INTEGER(source)[r%slen];
-      } else for (; r<len; r++) {
-        w = INTEGER(where)[start+r]; if (w<1) continue;
-        REAL(target)[ w-1 ] = REAL(source)[r%slen];
+    } break;
+    case VECSXP : {
+      const SEXP *sd = (SEXP *)DATAPTR(source);  // TODO revisit VECTOR_PTR
+      for (int i=0; i<len; i++) {
+        const int w = wd[i];
+        if (w<1) continue;
+        SET_VECTOR_ELT(target, w-1, sd[i&mask]);
       }
+    } break;
+    default :
+      error("Unsupported type '%s'", type2char(TYPEOF(target)));
     }
-    // if slen>10 it may be worth memcpy, but we'd need to first know if 'where' was a contiguous subset
   }
-  UNPROTECT(protecti);
+  // DELETE ... UNPROTECT(protecti);
 }
 
 SEXP allocNAVector(SEXPTYPE type, R_len_t n)
diff --git a/src/dogroups.c b/src/dogroups.c
index 0f7b86ae4b..ad85fe9c96 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -23,11 +23,10 @@ void setSizes() {
 
 SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose)
 {
-  R_len_t i, j, k, rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, vlen, origIlen=0, origSDnrow=0;
+  R_len_t i, j, k, rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0;
   int protecti=0;
   SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp;
-  Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE, recycleWarn=TRUE;
-  size_t size; // must be size_t, otherwise bug #5305 (integer overflow in memcpy)
+  Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE;
   clock_t tstart=0, tblock[10]={0}; int nblock[10]={0};
 
   if (!isInteger(order)) error("Internal error: order not integer vector"); // # nocov
@@ -132,7 +131,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
     INTEGER(GRP)[0] = i+1;  // group counter exposed as .GRP
 
     for (j=0; j<length(iSD); j++) {   // either this or the next for() will run, not both
-      size = SIZEOF(VECTOR_ELT(iSD,j));
+      size_t size = SIZEOF(VECTOR_ELT(iSD,j));
       memcpy((char *)DATAPTR(VECTOR_ELT(iSD,j)),  // ok use of memcpy. Loop'd through columns not rows
              (char *)DATAPTR(VECTOR_ELT(groups,INTEGER(jiscols)[j]-1))+i*size,
              size);
@@ -142,7 +141,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
     // Otherwise, igrp needs to be determined from starts, potentially taking care about the order if present.
     igrp = !isNull(jiscols) ? i : (length(grporder) ? INTEGER(grporder)[INTEGER(starts)[i]-1]-1 : INTEGER(starts)[i]-1);
     if (igrp>=0 && nrowgroups) for (j=0; j<length(BY); j++) {    // igrp can be -1 so 'if' is important, otherwise memcpy crash
-      size = SIZEOF(VECTOR_ELT(BY,j));
+      size_t size = SIZEOF(VECTOR_ELT(BY,j));
       memcpy((char *)DATAPTR(VECTOR_ELT(BY,j)),  // ok use of memcpy size 1. Loop'd through columns not rows
              (char *)DATAPTR(VECTOR_ELT(groups,INTEGER(grpcols)[j]-1))+igrp*size,
              size);
@@ -201,14 +200,14 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         for (j=0; j<grpn; j++) INTEGER(I)[j] = rownum+j+1;
         if (rownum>=0) {
           for (j=0; j<length(SDall); j++) {
-            size = SIZEOF(VECTOR_ELT(SDall,j));
+            size_t size = SIZEOF(VECTOR_ELT(SDall,j));
             memcpy((char *)DATAPTR(VECTOR_ELT(SDall,j)),  // direct memcpy best here, for usually large size groups. by= each row is slow and not recommended anyway, so we don't mind there's no switch here for grpn==1
                    (char *)DATAPTR(VECTOR_ELT(dt,INTEGER(dtcols)[j]-1))+rownum*size,
                    grpn*size);
             // SD is our own alloc'd memory, and the source (DT) is protected throughout, so no need for SET_* overhead
           }
           for (j=0; j<length(xSD); j++) {
-            size = SIZEOF(VECTOR_ELT(xSD,j));
+            size_t size = SIZEOF(VECTOR_ELT(xSD,j));
             memcpy((char *)DATAPTR(VECTOR_ELT(xSD,j)),  // ok use of memcpy. Loop'd through columns not rows
                    (char *)DATAPTR(VECTOR_ELT(dt,INTEGER(xjiscols)[j]-1))+rownum*size,
                    size);
@@ -219,18 +218,23 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         // Fairly happy with this block. No need for SET_* here. See comment above.
         for (k=0; k<grpn; k++) INTEGER(I)[k] = INTEGER(order)[ INTEGER(starts)[i]-1 + k ];
         for (j=0; j<length(SDall); j++) {
-          size = SIZEOF(VECTOR_ELT(SDall,j));
+          size_t size = SIZEOF(VECTOR_ELT(SDall,j));
           target = VECTOR_ELT(SDall,j);
           source = VECTOR_ELT(dt,INTEGER(dtcols)[j]-1);
+          int *Id = INTEGER(I);
           if (size==4) {
+            int *td = (int *)DATAPTR(target);
+            int *sd = (int *)DATAPTR(source);
             for (k=0; k<grpn; k++) {
-              rownum = INTEGER(I)[k]-1;
-              INTEGER(target)[k] = INTEGER(source)[rownum]; // on 32bit, copies pointers too
+              rownum = Id[k]-1;
+              td[k] = sd[rownum];  // on 32bit copies pointers too
             }
           } else {  // size 8
+            double *td = (double *)DATAPTR(target);
+            double *sd = (double *)DATAPTR(source);
             for (k=0; k<grpn; k++) {
-              rownum = INTEGER(I)[k]-1;
-              REAL(target)[k] = REAL(source)[rownum];       // on 64bit, copies pointers too
+              rownum = Id[k]-1;
+              td[k] = sd[rownum];  // on 64bit copies pointers too
             }
           }
         }
@@ -279,12 +283,26 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
     }
     if (!isNull(lhs)) {
       R_len_t origncol = LENGTH(dt);
-      for (j=0; j<length(lhs); j++) {
+      // check jval first before proceeding to add columns, so that if error on the first group, the columns aren't added
+      for (int j=0; j<length(lhs); j++) {
+        target = VECTOR_ELT(dt,INTEGER(lhs)[j]-1);
+        RHS = VECTOR_ELT(jval,j%LENGTH(jval));
+        if (isNull(RHS))
+          error("RHS is NULL when grouping :=. Makes no sense to delete a column by group. Perhaps use an empty vector instead.");
+        if (TYPEOF(target)!=TYPEOF(RHS) && !isNull(target))
+          error("Type of RHS ('%s') must match LHS ('%s'). To check and coerce would impact performance too much for the fastest cases. Either change the type of the target column, or coerce the RHS of := yourself (e.g. by using 1L instead of 1)", type2char(TYPEOF(RHS)), type2char(TYPEOF(target)));
+        int vlen = length(RHS);
+        if (vlen!=1 && vlen!=grpn) {
+          SEXP colname = isNull(target) ? STRING_ELT(newnames, INTEGER(lhs)[j]-origncol-1) : STRING_ELT(dtnames,INTEGER(lhs)[j]-1);
+          error("Supplied %d items to be assigned to group %d of size %d in column '%s'. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code.",vlen,i+1,grpn,CHAR(colname));
+          // e.g. in #4990 `:=` did not issue recycling warning during grouping. Now it is error not warning.
+        }
+      }
+      for (int j=0; j<length(lhs); j++) {
         target = VECTOR_ELT(dt,INTEGER(lhs)[j]-1);
         RHS = VECTOR_ELT(jval,j%LENGTH(jval));
         if (isNull(target)) {
           // first time adding to new column
-          if (isNull(RHS)) error("RHS is NULL when grouping :=. Makes no sense to delete a column by group. Perhaps use an empty vector instead.");
           if (TRUELENGTH(dt) < INTEGER(lhs)[j]) error("Internal error: Trying to add new column by reference but tl is full; alloc.col should have run first at R level before getting to this point in dogroups"); // # nocov
           tmp = PROTECT(allocNAVector(TYPEOF(RHS), LENGTH(VECTOR_ELT(dt,0))));
           // increment length only if the allocation passes, #1676
@@ -297,17 +315,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
           SET_STRING_ELT(dtnames, INTEGER(lhs)[j]-1, STRING_ELT(newnames, INTEGER(lhs)[j]-origncol-1));
           target = VECTOR_ELT(dt,INTEGER(lhs)[j]-1);
         }
-        if (TYPEOF(target)!=TYPEOF(RHS)) error("Type of RHS ('%s') must match LHS ('%s'). To check and coerce would impact performance too much for the fastest cases. Either change the type of the target column, or coerce the RHS of := yourself (e.g. by using 1L instead of 1)", type2char(TYPEOF(RHS)), type2char(TYPEOF(target)));
-        size = SIZEOF(target);
-        vlen = length(RHS);
-        if (vlen==0) continue;
-        if (grpn>0 && vlen>grpn && j<LENGTH(jval)) warning("RHS %d is length %d (greater than the size (%d) of group %d). The last %d element(s) will be discarded.", j+1, vlen, grpn, i+1, vlen-grpn);
-        // fix for #4990 - `:=` did not issue recycling warning during "by" operation.
-        if (vlen<grpn && vlen>0 && grpn%vlen != 0)
-          warning("Supplied %d items to be assigned to group %d of size %d in column '%s' (recycled leaving remainder of %d items).",vlen,i+1,grpn,CHAR(STRING_ELT(dtnames,INTEGER(lhs)[j]-1)),grpn%vlen);
-
-        memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS);
-
+        memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS);   // length mismatch checked above for all jval columns before starting to add any new columns
         copyMostAttrib(RHS, target);  // not names, otherwise test 778 would fail.
         /* OLD FIX: commented now. The fix below resulted in segfault on factor columns because I dint set the "levels"
            Instead of fixing that, I just removed setting class if it's factor. Not appropriate fix.
@@ -316,7 +324,6 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         OUTDATED: if (!isFactor(RHS)) setAttrib(target, R_ClassSymbol, getAttrib(RHS, R_ClassSymbol));
         OUTDATED: // added !isFactor(RHS) to fix #5104 (side-effect of fixing #2531)
            See also #155 and #36 */
-
       }
       UNPROTECT(1);
       continue;
@@ -415,12 +422,14 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         }
         switch (TYPEOF(target)) {     // rarely called so no need to optimize this switch
         case LGLSXP :
-        case INTSXP :
-          for (int r=0; r<maxn; r++) INTEGER(target)[thisansloc+r] = NA_INTEGER;
-          break;
-        case REALSXP :
-          for (int r=0; r<maxn; r++) REAL(target)[thisansloc+r] = NA_REAL;
-          break;
+        case INTSXP : {
+          int *td = INTEGER(target)+thisansloc;
+          for (int r=0; r<maxn; r++) td[r] = NA_INTEGER;
+        } break;
+        case REALSXP : {
+          double *td = REAL(target)+thisansloc;
+          for (int r=0; r<maxn; r++) td[r] = NA_REAL;
+        } break;
         case STRSXP :
           for (int r=0; r<maxn; r++) SET_STRING_ELT(target,thisansloc+r,NA_STRING);
           break;
@@ -434,9 +443,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
       }
       if (TYPEOF(source) != TYPEOF(target))
         error("Column %d of result for group %d is type '%s' but expecting type '%s'. Column types must be consistent for each group.", j+1, i+1, type2char(TYPEOF(source)), type2char(TYPEOF(target)));
-      if (recycleWarn && maxn%thislen != 0) {
-        warning("Column %d of result for group %d is length %d but the longest column in this result is %d. Recycled leaving remainder of %d items. This warning is once only for the first group with this issue.",j+1,i+1,thislen,maxn,maxn%thislen);
-        recycleWarn = FALSE;
+      if (thislen!=1 && thislen!=maxn) {
+        error("Supplied %d items for column %d of group %d which has %d rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code.", thislen, j+1, i+1, grpn);
       }
       memrecycle(target, R_NilValue, thisansloc, maxn, source);
     }
diff --git a/src/reorder.c b/src/reorder.c
index f41dd6d195..b9b004cad2 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -61,21 +61,23 @@ SEXP reorder(SEXP x, SEXP order)
     const SEXP v = isNewList(x) ? VECTOR_ELT(x,i) : x;
     const size_t size = SIZEOF(v);    // size_t, otherwise #5305 (integer overflow in memcpy)
     if (size==4) {
-      const int *restrict vd = INTEGER(v);  // just DATAPTR really. Could use REAL too or any of the other types. It's the LHS's type being 4 byte that is important.
+      int *vd = (int *)DATAPTR(v);
+      const int *restrict cvd = vd;
       int *restrict tmp = (int *)TMP;
       #pragma omp parallel for num_threads(nth)
       for (int i=start; i<=end; i++) {
-        tmp[i] = vd[idx[i]-1];  // copies 4 bytes including pointers on 32bit (STRSXP and VECSXP)
+        tmp[i] = cvd[idx[i]-1];  // copies 4 bytes; including pointers on 32bit (STRSXP and VECSXP)
       }
-      memcpy(INTEGER(v)+start, tmp+start, (end-start+1)*size);
+      memcpy(vd+start, tmp+start, (end-start+1)*size);
     } else {
-      const double *restrict vd = REAL(v);  // similarly, could use INTEGER() here.  It's the LHS's type being 8 bytes that's important.
+      double *vd = (double *)DATAPTR(v);
+      const double *restrict cvd = vd;
       double *restrict tmp = (double *)TMP;
       #pragma omp parallel for num_threads(nth)
       for (int i=start; i<=end; i++) {
-        tmp[i] = vd[idx[i]-1];  // copies 8 bytes including pointers on 64bit (STRSXP and VECSXP)
+        tmp[i] = cvd[idx[i]-1];  // copies 8 bytes; including pointers on 64bit (STRSXP and VECSXP)
       }
-      memcpy(REAL(v)+start, tmp+start, (end-start+1)*size);
+      memcpy(vd+start, tmp+start, (end-start+1)*size);
     }
   }
   // It's ok to ignore write barrier, and in parallel too, only because this reorder() function accepts and checks

From 6f57f2af928516392fabacae26178d05a903ccab Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Wed, 23 Jan 2019 21:57:55 -0800
Subject: [PATCH 4/9] Passes 'R CMD check' again on R 3.5.2. Probably not on R
 3.1.0 yet.

---
 inst/tests/tests.Rraw | 21 ++++++++-------------
 src/assign.c          | 24 +++++++++---------------
 2 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b881644d95..84bed528fd 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -5291,21 +5291,16 @@ test(1364.24, setdiff_(X, Y[0L], by.x = 'a'),
                 c = c("1", "3", "2"), d = c(1L, 3L, 2L), e = c(1L, 5L, 7L)))
 
 # not join along with by=.EACHI, #604
-DT <- data.table(A=c(1,1,1,2,2,2,2,3,3,4,5,5))[, `:=`(B=as.integer(A), C=c("c", "e", "a", "d"), D=factor(c("c", "e", "a", "d")), E=1:12)]
+DT <- data.table(A=c(1,1,1,2,2,2,2,3,3,4,5,5))[, `:=`(B=as.integer(A), C=rep(c("c", "e", "a", "d"),3L), D=factor(rep(c("c", "e", "a", "d"),3L)), E=1:12)]
 setkey(DT, A)
-test(1365.1, suppressMessages(DT[!J(c(2,5)), sum(E), by=.EACHI]),
-            suppressMessages(DT[J(c(1,3,4)), sum(E), by=.EACHI]))
+test(1365.1, DT[!J(c(2,5)), sum(E), by=.EACHI], DT[J(c(1,3,4)), sum(E), by=.EACHI])
 setkey(DT, B)
-test(1365.2, suppressMessages(DT[!J(c(4:5)), list(.N, sum(E)), by=.EACHI]),
-            suppressMessages(DT[J(1:3), list(.N, sum(E)), by=.EACHI]))
+test(1365.2, DT[!J(c(4:5)), list(.N, sum(E)), by=.EACHI], DT[J(1:3), list(.N, sum(E)), by=.EACHI])
 setkey(DT, C)
-test(1365.3, suppressMessages(copy(DT)[!"c", f := .N, by=.EACHI]),
-            suppressMessages(copy(DT)[c("a", "d", "e"), f := .N, by=.EACHI]))
+test(1365.3, copy(DT)[!"c", f:=.N, by=.EACHI], copy(DT)[c("a","d","e"), f:=.N, by=.EACHI])
 setkey(DT, D)
-test(1365.4, suppressMessages(DT[!J(factor("c")), .N, by=.EACHI]),
-            suppressMessages(DT[J(factor(c("a", "d", "e"))), .N, by=.EACHI]))
-test(1365.5, suppressMessages(DT[!"c", lapply(.SD, sum), by=.EACHI, .SDcols=c("B", "E")]),
-            suppressMessages(DT[c("a", "d", "e"), lapply(.SD, sum), by=.EACHI, .SDcols=c("B", "E")]))
+test(1365.4, DT[!J(factor("c")), .N, by=.EACHI], DT[J(factor(c("a","d","e"))), .N, by=.EACHI])
+test(1365.5, DT[!"c", lapply(.SD, sum), by=.EACHI, .SDcols=c("B","E")], DT[c("a","d","e"), lapply(.SD, sum), by=.EACHI, .SDcols=c("B", "E")])
 
 # uniqlengths doesn't error on 0-length input
 test(1366, uniqlengths(integer(0), 0L), integer(0))
@@ -8527,7 +8522,7 @@ setindex(dt, b)
 dt[, a := as.logical(sum(a)), by = b]
 test(1632.2, names(attributes(attr(dt, 'index'))), "__b")
 dt = data.table(a = rep(TFvec, 3), b = c("x", "y", "z"))
-test(1632.3, copy(dt)[, c := !a, by=b], copy(dt)[, c := !TFvec])
+test(1632.3, copy(dt)[, c := !a, by=b], copy(dt)[, c := rep(!TFvec,3L)])
 
 # by accepts colA:colB for interactive scenarios, #1395
 dt = data.table(x=rep(1,18), y=rep(1:2, each=9), z=rep(1:3,each=6), a=rep(1:6, each=3))[, b := 6]
@@ -12344,7 +12339,7 @@ test(1919, as.ITime(c('xxx', '10:43')), structure(c(NA, 38580L), class = "ITime"
 
 # wrong bmerge result if character gets coerced to factor, i is keyed, and level order in i is different from x, #2881
 iris = data.table(iris)
-iris$grp = c('A', 'B')
+iris$grp = rep(c('A','B'), 75L)
 iris[, Species1 := factor(Species, levels=c('setosa','versicolor','virginica'), labels=c('setosa','versicolor','Virginica'))]
 iSorted = data.table(Species1 = c('setosa','Virginica'), grp='B', key=c("grp","Species1"))
 i = setkey(copy(iSorted),NULL)
diff --git a/src/assign.c b/src/assign.c
index 1eb04d5f95..3e323c99f5 100644
--- a/src/assign.c
+++ b/src/assign.c
@@ -807,21 +807,18 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values, SEXP v
   return(dt);  // needed for `*tmp*` mechanism (when := isn't used), and to return the new object after a := for compound syntax.
 }
 
-/*
-static Rboolean anyNamed(SEXP x) {
-  if (MAYBE_REFERENCED(x)) return TRUE;
+static bool anyNamed(SEXP x) {
+  if (MAYBE_REFERENCED(x)) return true;
   if (isNewList(x)) for (int i=0; i<LENGTH(x); i++)
-    if (anyNamed(VECTOR_ELT(x,i))) return TRUE;
-  return FALSE;
+    if (anyNamed(VECTOR_ELT(x,i))) return true;
+  return false;
 }
-*/
 
 void memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
 // like memcpy but recycles single-item source
 // 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer()
 // assigns to target[start:start+len-1] or target[where[start:start+len-1]] where start is 0-based
 {
-  // DELETE ...int protecti=0;
   if (len<1) return;
   if (TYPEOF(target) != TYPEOF(source)) error("Internal error: TYPEOF(target)['%s']!=TYPEOF(source)['%s']", type2char(TYPEOF(target)),type2char(TYPEOF(source))); // # nocov
   int slen = length(source);
@@ -829,8 +826,7 @@ void memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
   // Internal error because the column has already been added to the DT, so length mismatch should have been caught before adding the column.
   // for 5647 this used to limit slen to len, but no longer
 
-  /*
-  TODELETE ...
+  int protecti=0;
   if (isNewList(source)) {
     // A list() column; i.e. target is a column of pointers to SEXPs rather than the much more common case
     // where memrecycle copies the DATAPTR data to the atomic target from the atomic source.
@@ -843,14 +839,12 @@ void memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
     // SEXP pointed to.
     // If source is already not named (because j already created a fresh unnamed vector within a list()) we don't want to
     // duplicate unnecessarily, hence checking for named rather than duplicating always.
-    // See #481 and #1270
+    // See #481, #1270 and tests 1341.* fail without this duplicate().
     if (anyNamed(source)) {
       source = PROTECT(duplicate(source));
       protecti++;
     }
   }
-  */
-  size_t size = SIZEOF(target);
   if (!length(where)) {
     switch (TYPEOF(target)) {
     case LGLSXP: case INTSXP :
@@ -860,7 +854,7 @@ void memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
         const int val = INTEGER(source)[0];
         for (int i=0; i<len; i++) td[start+i] = val;  // no R API inside loop as INTEGER has overhead (even when it's an inline function)
       } else {
-        memcpy(INTEGER(target)+start, INTEGER(source), slen*size);
+        memcpy(INTEGER(target)+start, INTEGER(source), slen*SIZEOF(target));
       }
       break;
     case REALSXP :
@@ -869,7 +863,7 @@ void memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
         const double val = REAL(source)[0];
         for (int i=0; i<len; i++) td[start+i] = val;
       } else {
-        memcpy(REAL(target)+start, REAL(source), slen*size);
+        memcpy(REAL(target)+start, REAL(source), slen*SIZEOF(target));
       }
       break;
     case STRSXP :
@@ -935,7 +929,7 @@ void memrecycle(SEXP target, SEXP where, int start, int len, SEXP source)
       error("Unsupported type '%s'", type2char(TYPEOF(target)));
     }
   }
-  // DELETE ... UNPROTECT(protecti);
+  UNPROTECT(protecti);
 }
 
 SEXP allocNAVector(SEXPTYPE type, R_len_t n)

From b72382e8ac93c4f73a4f0c7a1d4cd9e0debfd05d Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Wed, 23 Jan 2019 22:53:00 -0800
Subject: [PATCH 5/9] news item tweak

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index b5e2c25af8..902d2d8110 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,7 +4,7 @@
 
 #### NEW FEATURES
 
-1. `:=` no longer recycles too-short RHS vectors to match the length on the LHS. Like base R, there was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length. All cases are now an error unless `length(RHS)==1` or `length(RHS)==length(LHS)`. Consistent feedback was that recycling is rarely useful and more often a bug. In rare cases where you need to recycle a length>1 vector, use `rep()` explicitly. Single values are still recycled silently as before.
+1. `:=` no longer recycles too-short RHS vectors to match the LHS length. There was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length (the same behaviour as base R). All recycling is now an error unless `length(RHS)==1` or `length(RHS)==length(LHS)`. Consistent feedback was that recycling is rarely useful and more often a bug. In rare cases where you need to recycle a length>1 vector, use `rep()` explicitly. Single values are still recycled silently as before.
 
 #### BUG FIXES
 

From 56f72904ddf3f5b6fa548831ebc923de02cf4981 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Thu, 24 Jan 2019 15:53:05 -0800
Subject: [PATCH 6/9] CRAN_Release.cmd only

---
 CRAN_Release.cmd | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/CRAN_Release.cmd b/CRAN_Release.cmd
index 38c19ea24b..a1927b554e 100644
--- a/CRAN_Release.cmd
+++ b/CRAN_Release.cmd
@@ -106,15 +106,15 @@ test.data.table()
 install.packages("xml2")   # to check the 150 URLs in NEWS.md under --as-cran below
 q("no")
 R CMD build .
-R CMD check data.table_1.11.9.tar.gz --as-cran
-R CMD INSTALL data.table_1.11.9.tar.gz
+R CMD check data.table_1.12.1.tar.gz --as-cran
+R CMD INSTALL data.table_1.12.1.tar.gz
 
 # Test C locale doesn't break test suite (#2771)
 echo LC_ALL=C > ~/.Renviron
 R
 Sys.getlocale()=="C"
 q("no")
-R CMD check data.table_1.11.9.tar.gz
+R CMD check data.table_1.12.1.tar.gz
 rm ~/.Renviron
 
 R
@@ -145,7 +145,7 @@ alias R310=~/build/R-3.1.0/bin/R
 ### END ONE TIME BUILD
 
 cd ~/GitHub/data.table
-R310 CMD INSTALL ./data.table_1.11.9.tar.gz
+R310 CMD INSTALL ./data.table_1.12.1.tar.gz
 R310
 require(data.table)
 test.data.table()
@@ -157,7 +157,7 @@ test.data.table()
 vi ~/.R/Makevars
 # Make line SHLIB_OPENMP_CFLAGS= active to remove -fopenmp
 R CMD build .
-R CMD INSTALL data.table_1.11.9.tar.gz   # ensure that -fopenmp is missing and there are no warnings
+R CMD INSTALL data.table_1.12.1.tar.gz   # ensure that -fopenmp is missing and there are no warnings
 R
 require(data.table)   # observe startup message about no OpenMP detected
 test.data.table()
@@ -165,7 +165,7 @@ q("no")
 vi ~/.R/Makevars
 # revert change above
 R CMD build .
-R CMD check data.table_1.11.9.tar.gz
+R CMD check data.table_1.12.1.tar.gz
 
 #####################################################
 #  R-devel with UBSAN, ASAN and strict-barrier on too
@@ -194,7 +194,7 @@ cd R-devel-strict    # important to change directory name before building not af
 make
 alias Rdevel-strict='~/build/R-devel-strict/bin/R --vanilla'
 cd ~/GitHub/data.table
-Rdevel-strict CMD INSTALL data.table_1.11.9.tar.gz
+Rdevel-strict CMD INSTALL data.table_1.12.1.tar.gz
 # Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with them so should be passed through to here
 Rdevel-strict
 install.packages(c("bit64","xts","nanotime","R.utils"), repos="http://cloud.r-project.org")  # minimum packages needed to not skip any tests in test.data.table()
@@ -230,7 +230,7 @@ cd R-devel
 make
 cd ~/GitHub/data.table
 vi ~/.R/Makevars  # make the -O0 -g line active, for info on source lines with any problems
-Rdevel CMD INSTALL data.table_1.11.9.tar.gz
+Rdevel CMD INSTALL data.table_1.12.1.tar.gz
 Rdevel -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --show-leak-kinds=definite"
 # gctorture(TRUE)      # very slow, many days
 # gctorture2(step=100)
@@ -272,7 +272,7 @@ There are some things to overcome to achieve compile without USE_RINTERNALS, tho
 ## Rdevel
 ## install.packages("bit64")
 ## q("no")
-## Rdevel CMD INSTALL ~/data.table_1.11.9.tar.gz
+## Rdevel CMD INSTALL ~/data.table_1.12.1.tar.gz
 ## Rdevel
 ## .Machine$sizeof.longdouble   # check 0
 ## require(data.table)
@@ -287,7 +287,7 @@ cd ~/build/rchk/trunk
 . ../scripts/config.inc
 . ../scripts/cmpconfig.inc
 # edit ~/.R/Makevars to set CFLAGS=-O0 -g so that rchk can provide source line numbers
-echo 'install.packages("~/GitHub/data.table/data.table_1.11.9.tar.gz",repos=NULL)' | ./bin/R --slave
+echo 'install.packages("~/GitHub/data.table/data.table_1.12.1.tar.gz",repos=NULL)' | ./bin/R --slave
 ../scripts/check_package.sh data.table
 cat packages/lib/data.table/libs/*check
 # keep running and rerunning locally until all problems cease.
@@ -413,8 +413,7 @@ BiocManager::valid()
 
 avail = available.packages(repos=BiocManager::repositories())  # includes CRAN at the end from getOption("repos"). And ensure latest Bioc version is in repo path here.
 deps = tools::package_dependencies("data.table", db=avail, which="most", reverse=TRUE, recursive=FALSE)[[1]]
-exclude = c("TCGAbiolinks",  # too long (>30mins): https://github.com/BioinformaticsFMRP/TCGAbiolinks/issues/240
-            "ctsem")         # too long (>30mins) see Ttotal on CRAN checks and warnings/errors: https://cran.r-project.org/web/checks/check_results_ctsem.html
+exclude = c("TCGAbiolinks")  # too long (>30mins): https://github.com/BioinformaticsFMRP/TCGAbiolinks/issues/240
 deps = deps[-match(exclude, deps)]
 table(avail[deps,"Repository"])
 length(deps)
@@ -552,7 +551,7 @@ run = function(which=c("not.started","cran.fail","bioc.fail","both.fail","rerun.
 }
 
 # ** ensure latest version installed into revdeplib **
-system("R CMD INSTALL ~/GitHub/data.table/data.table_1.11.9.tar.gz")
+system("R CMD INSTALL ~/GitHub/data.table/data.table_1.12.1.tar.gz")
 run("rerun.all")
 
 out = function(fnam="~/fail.log") {
@@ -561,10 +560,9 @@ out = function(fnam="~/fail.log") {
   cat(paste(x,collapse=" "), "\n")
   cat(capture.output(sessionInfo()), "\n", file=fnam, sep="\n")
   cat("> BiocManager::install()\n", file=fnam, append=TRUE)
-  capture.output(BiocManager::install(), type="message", file=fnam, append=TRUE)
+  cat(capture.output(BiocManager::install(), type="message"), file=fnam, sep="\n", append=TRUE)
   cat("> BiocManager::valid()\n", file=fnam, append=TRUE)
-  capture.output(ans<-BiocManager::valid(), type="message", file=fnam, append=TRUE)
-  cat(ans, "\n", file=fnam, append=TRUE, sep="\n")
+  cat(isTRUE(BiocManager::valid()), "\n\n\n", file=fnam, append=TRUE)
   for (i in x) {
     system(paste0("ls | grep '",i,".*tar.gz' >> ",fnam))
     system(paste0("grep -H . ./",i,".Rcheck/00check.log >> ",fnam))

From 70ab25df880d55f8f59ef7d6f98630b43b6248a0 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Thu, 24 Jan 2019 17:29:09 -0800
Subject: [PATCH 7/9] news item: better focus on length>1 vectors

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 902d2d8110..9daf1abd90 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,7 +4,7 @@
 
 #### NEW FEATURES
 
-1. `:=` no longer recycles too-short RHS vectors to match the LHS length. There was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length (the same behaviour as base R). All recycling is now an error unless `length(RHS)==1` or `length(RHS)==length(LHS)`. Consistent feedback was that recycling is rarely useful and more often a bug. In rare cases where you need to recycle a length>1 vector, use `rep()` explicitly. Single values are still recycled silently as before.
+1. `:=` no longer recycles length>1 RHS vectors. There was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length (the same behaviour as base R). Consistent feedback for several years has been that recycling is more often a bug. In rare cases where you need to recycle a length>1 vector, use `rep()` explicitly. Single values are still recycled silently as before. Early warning was given in [this tweet](https://twitter.com/MattDowle/status/1088544083499311104). The 758 CRAN and Bioconductor packages using data.table were tested and the maintainers of the 16 packages affected (2%) were consulted before going ahead.
 
 #### BUG FIXES
 

From c615af8d52797bd09def6f5bbbe74897126bf267 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Sun, 3 Feb 2019 11:32:40 -0800
Subject: [PATCH 8/9] Reinstated USE_RINTERNALS so the progress in the branch
 can be merged and released.

---
 src/data.table.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/data.table.h b/src/data.table.h
index 20c496fa27..498c6d6f88 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -1,4 +1,5 @@
 #include <R.h>
+#define USE_RINTERNALS
 #include <Rinternals.h>
 // #include <signal.h> // the debugging machinery + breakpoint aidee
 // raise(SIGINT);

From 92b785ad6cb6b31c9590c5f4297c8dfd622f9873 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Sun, 3 Feb 2019 12:00:44 -0800
Subject: [PATCH 9/9] Added PR link to news item

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index e8bda624be..3a1036e651 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,7 +4,7 @@
 
 #### NEW FEATURES
 
-1. `:=` no longer recycles length>1 RHS vectors. There was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length (the same behaviour as base R). Consistent feedback for several years has been that recycling is more often a bug. In rare cases where you need to recycle a length>1 vector, use `rep()` explicitly. Single values are still recycled silently as before. Early warning was given in [this tweet](https://twitter.com/MattDowle/status/1088544083499311104). The 758 CRAN and Bioconductor packages using data.table were tested and the maintainers of the 16 packages affected (2%) were consulted before going ahead.
+1. `:=` no longer recycles length>1 RHS vectors. There was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length (the same behaviour as base R). Consistent feedback for several years has been that recycling is more often a bug. In rare cases where you need to recycle a length>1 vector, use `rep()` explicitly. Single values are still recycled silently as before. Early warning was given in [this tweet](https://twitter.com/MattDowle/status/1088544083499311104). The 758 CRAN and Bioconductor packages using data.table were tested and the maintainers of the 16 packages affected (2%) were consulted before going ahead, [#3310](https://github.com/Rdatatable/data.table/pull/3310).
 
 #### BUG FIXES