From dc006ba51d29ad57410e44490d6f375e4ca0b3ab Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sun, 17 Mar 2019 19:26:00 +0530
Subject: [PATCH 1/8] gforce opts for first-last, head-tail, closes #2030 and
 #3462

---
 NEWS.md               | 5 +++++
 R/data.table.R        | 7 ++++++-
 inst/tests/tests.Rraw | 9 +++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 610f678e31..d89e03f875 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -34,6 +34,11 @@
 
 11. A join's result could be incorrectly keyed when a single nomatch occurred at the very beginning while all other values matched, [#3441](https://github.com/Rdatatable/data.table/issues/3441). The incorrect key would cause incorrect results in subsequent queries. Thanks to @symbalex for reporting and @franknarf1 for pinpointing the root cause.
 
+12. `first` and `last` methods called by group inside `[.data.table` on `.SD` are now optimized to GForce. [#2030](https://github.com/Rdatatable/data.table/issues/2030). Thanks to @franknarf1 for reporting.
+
+13. `head` and `tail` methods called by group inside `[.data.table` on `.SD` now use default `n` argument. [#3462](https://github.com/Rdatatable/data.table/issues/3462).
+
+
 #### NOTES
 
 1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background.
diff --git a/R/data.table.R b/R/data.table.R
index 2052d1c921..07993a26b8 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1613,7 +1613,12 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
         jvnames = ansvarsnew
       }
     } else if (length(as.character(jsub[[1L]])) == 1L) {  # Else expect problems with <jsub[[1L]] == >
-      if (length(jsub) == 3L && (jsub[[1L]] == "[" || jsub[[1L]] == "head" || jsub[[1L]] == "tail") && jsub[[2L]] == ".SD" && (is.numeric(jsub[[3L]]) || jsub[[3L]] == ".N") ) {
+      subopt = length(jsub) == 3L && jsub[[1L]] == "[" && (is.numeric(jsub[[3L]]) || jsub[[3L]] == ".N")
+      headopt = jsub[[1L]] == "head" || jsub[[1L]] == "tail"
+      firstopt = jsub[[1L]] == "first" || jsub[[1L]] == "last" # fix for #2030
+      if ((length(jsub) >= 2L && jsub[[2L]] == ".SD") &&
+          (subopt || headopt || firstopt)) {
+        if (headopt && length(jsub)==2L) jsub[["n"]] = 6L # head-tail n=6 when missing #3462
         # optimise .SD[1] or .SD[2L]. Not sure how to test .SD[a] as to whether a is numeric/integer or a data.table, yet.
         jsub = as.call(c(quote(list), lapply(ansvarsnew, function(x) { jsub[[2L]] = as.name(x); jsub })))
         jvnames = ansvarsnew
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2993c05a22..10281a36ce 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -13626,6 +13626,15 @@ dx = data.table(id = 1L, key = "id")
 di = list(z=c(2L, 1L))
 test(1999.2, key(dx[di]), NULL)
 
+# gfirst(.SD) throws an error about not using head(.SD, n), but the latter works #2030
+DT = data.table(id = c(1L,1L,2L), v = 1:3)
+test(2000.1, DT[, first(.SD), by=id, .SDcols="v"], DT[, head(.SD, 1L), by=id])
+DT = data.table(id = c(1L,1L,2L), v = 1:3, y = 3:1)
+test(2000.2, DT[, first(.SD), by=id, .SDcols=c("v","y")], DT[, head(.SD, 1L), by=id, .SDcols=c("v","y")])
+# ghead argument "n" is missing, with no default #3462
+DT = data.table(a=c(rep(1L, 7L), rep(2L, 5L)), b=1:12, d=12:1)
+test(2001, DT[, head(.SD), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12), d=c(12:7, 5:1)))
+
 
 ###################################
 #  Add new tests above this line  #

From f0a0a94e0b740474ecfae3a90e32cef6328a78c6 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sun, 17 Mar 2019 19:43:52 +0530
Subject: [PATCH 2/8] R dataptrs out of loops in gforce funs

---
 src/gsumm.c | 63 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 18 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index 419d0459e2..6f8dc1f584 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -955,32 +955,41 @@ SEXP glast(SEXP x) {
   SEXP ans;
   if (nrow != n) error("nrow [%d] != length(x) [%d] in gtail", nrow, n);
   switch(TYPEOF(x)) {
-  case LGLSXP:
+  case LGLSXP: {
+    const int *ix = LOGICAL(x);
     ans = PROTECT(allocVector(LGLSXP, ngrp));
+    int *ians = LOGICAL(ans);
     for (i=0; i<ngrp; i++) {
       k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      LOGICAL(ans)[i] = LOGICAL(x)[k];
+      ians[i] = ix[k];
     }
+  }
     break;
-  case INTSXP:
+  case INTSXP: {
+    const int *ix = INTEGER(x);
     ans = PROTECT(allocVector(INTSXP, ngrp));
+    int *ians = INTEGER(ans);
     for (i=0; i<ngrp; i++) {
       k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      INTEGER(ans)[i] = INTEGER(x)[k];
+      ians[i] = ix[k];
     }
+  }
     break;
-  case REALSXP:
+  case REALSXP: {
+    const double *dx = REAL(x);
     ans = PROTECT(allocVector(REALSXP, ngrp));
+    double *dans = REAL(ans);
     for (i=0; i<ngrp; i++) {
       k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      REAL(ans)[i] = REAL(x)[k];
+      dans[i] = dx[k];
     }
+  }
     break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp));
@@ -1017,32 +1026,41 @@ SEXP gfirst(SEXP x) {
   SEXP ans;
   if (nrow != n) error("nrow [%d] != length(x) [%d] in ghead", nrow, n);
   switch(TYPEOF(x)) {
-  case LGLSXP:
+  case LGLSXP: {
+    int const *ix = LOGICAL(x);
     ans = PROTECT(allocVector(LGLSXP, ngrp));
+    int *ians = LOGICAL(ans);
     for (i=0; i<ngrp; i++) {
       k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      LOGICAL(ans)[i] = LOGICAL(x)[k];
+      ians[i] = ix[k];
     }
+  }
     break;
-  case INTSXP:
+  case INTSXP: {
+    const int *ix = INTEGER(x);
     ans = PROTECT(allocVector(INTSXP, ngrp));
+    int *ians = INTEGER(ans);
     for (i=0; i<ngrp; i++) {
       k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      INTEGER(ans)[i] = INTEGER(x)[k];
+      ians[i] = ix[k];
     }
+  }
     break;
-  case REALSXP:
+  case REALSXP: {
+    const double *dx = REAL(x);
     ans = PROTECT(allocVector(REALSXP, ngrp));
+    double *dans = REAL(ans);
     for (i=0; i<ngrp; i++) {
       k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      REAL(ans)[i] = REAL(x)[k];
+      dans[i] = dx[k];
     }
+  }
     break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp));
@@ -1088,35 +1106,44 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
   SEXP ans;
   if (nrow != n) error("nrow [%d] != length(x) [%d] in ghead", nrow, n);
   switch(TYPEOF(x)) {
-  case LGLSXP:
+  case LGLSXP: {
+    const int *ix = LOGICAL(x);
     ans = PROTECT(allocVector(LGLSXP, ngrp));
+    int *ians = LOGICAL(ans);
     for (i=0; i<ngrp; i++) {
       if (val > grpsize[i]) { LOGICAL(ans)[i] = NA_LOGICAL; continue; }
       k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      LOGICAL(ans)[i] = LOGICAL(x)[k];
+      ians[i] = ix[k];
     }
+  }
     break;
-  case INTSXP:
+  case INTSXP: {
+    const int *ix = LOGICAL(x);
     ans = PROTECT(allocVector(INTSXP, ngrp));
+    int *ians = LOGICAL(ans);
     for (i=0; i<ngrp; i++) {
       if (val > grpsize[i]) { INTEGER(ans)[i] = NA_INTEGER; continue; }
       k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      INTEGER(ans)[i] = INTEGER(x)[k];
+      ians[i] = ix[k];
     }
+  }
     break;
-  case REALSXP:
+  case REALSXP: {
+    const double *dx = REAL(x);
     ans = PROTECT(allocVector(REALSXP, ngrp));
+    double *dans = REAL(ans);
     for (i=0; i<ngrp; i++) {
       if (val > grpsize[i]) { REAL(ans)[i] = NA_REAL; continue; }
       k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      REAL(ans)[i] = REAL(x)[k];
+      dans[i] = dx[k];
     }
+  }
     break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp));

From 0eae0bc2e1382c14543bf3c0657c5d5f112a2f49 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Mon, 18 Mar 2019 09:55:23 +0530
Subject: [PATCH 3/8] coverage gforce

---
 R/data.table.R        |  2 +-
 inst/tests/tests.Rraw | 26 +++++++++++++++++++++++---
 src/gsumm.c           |  4 ++--
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 07993a26b8..a96c0efff3 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1717,7 +1717,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
       else
         cat("lapply optimization is on, j unchanged as '",deparse(jsub,width.cutoff=200L),"'\n",sep="")
     }
-    dotN <- function(x) if (is.name(x) && x == ".N") TRUE else FALSE # For #5760
+    dotN <- function(x) is.name(x) && x == ".N" # For #5760
     # FR #971, GForce kicks in on all subsets, no joins yet. Although joins could work with
     # nomatch=0L even now.. but not switching it on yet, will deal it separately.
     if (getOption("datatable.optimize")>=2 && !is.data.table(i) && !byjoin && length(f__) && !length(lhs)) {
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 10281a36ce..b5659c4519 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -13629,11 +13629,31 @@ test(1999.2, key(dx[di]), NULL)
 # gfirst(.SD) throws an error about not using head(.SD, n), but the latter works #2030
 DT = data.table(id = c(1L,1L,2L), v = 1:3)
 test(2000.1, DT[, first(.SD), by=id, .SDcols="v"], DT[, head(.SD, 1L), by=id])
-DT = data.table(id = c(1L,1L,2L), v = 1:3, y = 3:1)
-test(2000.2, DT[, first(.SD), by=id, .SDcols=c("v","y")], DT[, head(.SD, 1L), by=id, .SDcols=c("v","y")])
+DT = data.table(id = c(1L,1L,2L), v = 1:3, y = 3:1, z = c(TRUE, TRUE, FALSE), u = c("a","b","c"))
+test(2000.2, DT[, first(.SD), by=id, .SDcols=c("v","y","z","u")], DT[, head(.SD, 1L), by=id, .SDcols=c("v","y","z","u")])
+test(2000.3, DT[, last(.SD), by=id, .SDcols=c("v","y","z","u")], DT[, tail(.SD, 1L), by=id, .SDcols=c("v","y","z","u")])
 # ghead argument "n" is missing, with no default #3462
 DT = data.table(a=c(rep(1L, 7L), rep(2L, 5L)), b=1:12, d=12:1)
-test(2001, DT[, head(.SD), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12), d=c(12:7, 5:1)))
+test(2001.1, DT[, head(.SD), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12), d=c(12:7, 5:1)))
+# test(2001.2, DT[, head(b), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12))) # TODO, requires rework of https://github.com/Rdatatable/data.table/blob/8a355cf653977752f8d39c23cf0ea6a7714851eb/R/data.table.R#L1728
+# gforce tests coverage
+if (test_bit64) {
+  DT = data.table(id=c(rep(1L,3), rep(2L, 3)), v=bit64::as.integer64(c(1:3, 4L, 5:6)))
+  test(2002.01, DT[2:6, sum(v), id], data.table(id=1:2, V1=bit64::as.integer64(c(5L,15L)))) # gather, case of int64 and irows
+}
+DT = data.table(id = c(1L,1L,2L), v = c(1i, 2i, 3i))
+test(2002.02, DT[, min(v), by=id], error="not supported by GForce min")
+test(2002.03, DT[, max(v), by=id], error="not supported by GForce max")
+test(2002.04, DT[, median(v), by=id], error="not supported by GForce median")
+test(2002.05, DT[, head(v, 1), by=id], error="not supported by GForce head")
+test(2002.06, DT[, tail(v, 1), by=id], error="not supported by GForce tail")
+test(2002.07, DT[, v[1], by=id], error="not supported by GForce subset")
+test(2002.08, DT[, sd(v), by=id], error="not supported by GForce sd")
+test(2002.09, DT[, var(v), by=id], error="not supported by GForce var")
+test(2002.10, DT[, prod(v), by=id], error="not supported by GForce prod")
+DT = data.table(id = c(1L,1L,2L,2L), v = c(1L, 2L, NA, NA))
+test(2002.11, DT[, median(v), id], data.table(id=1:2, V1=c(1.5, NA))) # median whole group has NAs
+
 
 
 ###################################
diff --git a/src/gsumm.c b/src/gsumm.c
index 6f8dc1f584..0960aea65a 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -496,7 +496,7 @@ SEXP gmean(SEXP x, SEXP narm)
     break;
   default:
     free(s); free(c);
-    error("Type '%s' not supported by GForce mean (gmean) na.rm=TRUE. Either add the prefix base::mean(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x)));
+    error("Type '%s' not supported by GForce mean (gmean) na.rm=TRUE. Either add the prefix base::mean(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); // # nocov because it already stops at gsum, remove nocov if gmean will support a type that gsum wont
   }
   ans = PROTECT(allocVector(REALSXP, ngrp));
   for (int i=0; i<ngrp; i++) {
@@ -1297,7 +1297,7 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
     SETLENGTH(sub, maxgrpn);
     break;
   default:
-      if (isSD) {
+      if (!isSD) {
         error("Type '%s' not supported by GForce var (gvar). Either add the prefix stats::var(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x)));
       } else {
         error("Type '%s' not supported by GForce sd (gsd). Either add the prefix stats::sd(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x)));

From c2599d884bf2980ca8ef49948b1af6313793b00a Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Mon, 18 Mar 2019 12:38:17 +0530
Subject: [PATCH 4/8] more coverage

---
 inst/tests/tests.Rraw | 13 ++++++++-----
 src/gsumm.c           |  4 ++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b5659c4519..4bfe9f5d75 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -13628,14 +13628,18 @@ test(1999.2, key(dx[di]), NULL)
 
 # gfirst(.SD) throws an error about not using head(.SD, n), but the latter works #2030
 DT = data.table(id = c(1L,1L,2L), v = 1:3)
-test(2000.1, DT[, first(.SD), by=id, .SDcols="v"], DT[, head(.SD, 1L), by=id])
+test(2000.1, DT[, first(.SD), by=id, .SDcols="v"], data.table(id=1:2, v=c(1L,3L)))
+test(2000.2, DT[, first(v), by=id], data.table(id=1:2, V1=c(1L,3L)))
+test(2000.3, DT[, last(v), by=id], data.table(id=1:2, V1=c(2L,3L)))
+test(2000.4, DT[, v[1L], by=id], data.table(id=1:2, V1=c(1L,3L)))
 DT = data.table(id = c(1L,1L,2L), v = 1:3, y = 3:1, z = c(TRUE, TRUE, FALSE), u = c("a","b","c"))
-test(2000.2, DT[, first(.SD), by=id, .SDcols=c("v","y","z","u")], DT[, head(.SD, 1L), by=id, .SDcols=c("v","y","z","u")])
-test(2000.3, DT[, last(.SD), by=id, .SDcols=c("v","y","z","u")], DT[, tail(.SD, 1L), by=id, .SDcols=c("v","y","z","u")])
+test(2000.5, DT[, first(.SD), by=id, .SDcols=c("v","y","z","u")], data.table(id=1:2, v=c(1L,3L), y=c(3L,1L), z=c(TRUE,FALSE), u=c("a","c")))
+test(2000.6, DT[, last(.SD), by=id, .SDcols=c("v","y","z","u")], data.table(id=1:2, v=c(2L,3L), y=c(2L,1L), z=c(TRUE,FALSE), u=c("b","c")))
+test(2000.7, DT[, .SD[1L], by=id, .SDcols=c("v","y","z","u")], data.table(id=1:2, v=c(1L,3L), y=c(3L,1L), z=c(TRUE,FALSE), u=c("a","c")))
 # ghead argument "n" is missing, with no default #3462
 DT = data.table(a=c(rep(1L, 7L), rep(2L, 5L)), b=1:12, d=12:1)
 test(2001.1, DT[, head(.SD), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12), d=c(12:7, 5:1)))
-# test(2001.2, DT[, head(b), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12))) # TODO, requires rework of https://github.com/Rdatatable/data.table/blob/8a355cf653977752f8d39c23cf0ea6a7714851eb/R/data.table.R#L1728
+# test(2001.2, DT[, head(b), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12))) # TODO, requires rework of 8a355cf/R/data.table.R#L1728
 # gforce tests coverage
 if (test_bit64) {
   DT = data.table(id=c(rep(1L,3), rep(2L, 3)), v=bit64::as.integer64(c(1:3, 4L, 5:6)))
@@ -13655,7 +13659,6 @@ DT = data.table(id = c(1L,1L,2L,2L), v = c(1L, 2L, NA, NA))
 test(2002.11, DT[, median(v), id], data.table(id=1:2, V1=c(1.5, NA))) # median whole group has NAs
 
 
-
 ###################################
 #  Add new tests above this line  #
 ###################################
diff --git a/src/gsumm.c b/src/gsumm.c
index 0960aea65a..725f08978b 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -495,8 +495,8 @@ SEXP gmean(SEXP x, SEXP narm)
     }
     break;
   default:
-    free(s); free(c);
-    error("Type '%s' not supported by GForce mean (gmean) na.rm=TRUE. Either add the prefix base::mean(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); // # nocov because it already stops at gsum, remove nocov if gmean will support a type that gsum wont
+    free(s); free(c); // # nocov because it already stops at gsum, remove nocov if gmean will support a type that gsum wont
+    error("Type '%s' not supported by GForce mean (gmean) na.rm=TRUE. Either add the prefix base::mean(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); // # nocov
   }
   ans = PROTECT(allocVector(REALSXP, ngrp));
   for (int i=0; i<ngrp; i++) {

From 40f778de431d7c4265b43c9df9746064a88e899a Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Mon, 18 Mar 2019 21:18:42 +0530
Subject: [PATCH 5/8] missing n, handle non-SD head-tail input #3462

---
 R/data.table.R        | 2 +-
 inst/tests/tests.Rraw | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index a96c0efff3..8cc1e86a20 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1735,7 +1735,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
           # Need is.symbol() check. See #1369, #1974 or #2949 issues and explanation below by searching for one of these issues.
           cond = is.call(q) && is.symbol(q[[1]]) && (q1c <- as.character(q[[1]])) %chin% gfuns && !is.call(q[[2L]])
           # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls
-          ans  = cond && (length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L)))
+          ans = cond && (length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) && (!q1c %chin% c("head","tail")) # head-tail uses default value n=6 which as of now should not go gforce
           if (identical(ans, TRUE)) return(ans)
           # otherwise there must be three arguments, and only in two cases --
           #   1) head/tail(x, 1) or 2) x[n], n>0
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 4bfe9f5d75..6e7a42f965 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -13639,7 +13639,9 @@ test(2000.7, DT[, .SD[1L], by=id, .SDcols=c("v","y","z","u")], data.table(id=1:2
 # ghead argument "n" is missing, with no default #3462
 DT = data.table(a=c(rep(1L, 7L), rep(2L, 5L)), b=1:12, d=12:1)
 test(2001.1, DT[, head(.SD), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12), d=c(12:7, 5:1)))
-# test(2001.2, DT[, head(b), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12))) # TODO, requires rework of 8a355cf/R/data.table.R#L1728
+test(2001.2, DT[, head(b), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), V1=c(1:6, 8:12)))
+test(2001.3, DT[, tail(.SD), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(2:7, 8:12), d=c(11:6, 5:1)))
+test(2001.4, DT[, tail(b), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), V1=c(2:7, 8:12)))
 # gforce tests coverage
 if (test_bit64) {
   DT = data.table(id=c(rep(1L,3), rep(2L, 3)), v=bit64::as.integer64(c(1:3, 4L, 5:6)))

From 3389fd2441aad6075cf3de598fb1e4e8f90521c6 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Mon, 1 Apr 2019 14:29:11 -0700
Subject: [PATCH 6/8] Added 'complex' to the error strings to test that part of
 error message construction

---
 inst/tests/tests.Rraw | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index f84b45d726..f51c6046c0 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -13947,15 +13947,15 @@ if (test_bit64) {
   test(2019.01, DT[2:6, sum(v), id], data.table(id=1:2, V1=bit64::as.integer64(c(5L,15L)))) # gather, case of int64 and irows
 }
 DT = data.table(id = c(1L,1L,2L), v = c(1i, 2i, 3i))
-test(2019.02, DT[, min(v), by=id], error="not supported by GForce min")
-test(2019.03, DT[, max(v), by=id], error="not supported by GForce max")
-test(2019.04, DT[, median(v), by=id], error="not supported by GForce median")
-test(2019.05, DT[, head(v, 1), by=id], error="not supported by GForce head")
-test(2019.06, DT[, tail(v, 1), by=id], error="not supported by GForce tail")
-test(2019.07, DT[, v[1], by=id], error="not supported by GForce subset")
-test(2019.08, DT[, sd(v), by=id], error="not supported by GForce sd")
-test(2019.09, DT[, var(v), by=id], error="not supported by GForce var")
-test(2019.10, DT[, prod(v), by=id], error="not supported by GForce prod")
+test(2019.02, DT[, min(v), by=id], error="'complex' not supported by GForce min")
+test(2019.03, DT[, max(v), by=id], error="'complex' not supported by GForce max")
+test(2019.04, DT[, median(v), by=id], error="'complex' not supported by GForce median")
+test(2019.05, DT[, head(v, 1), by=id], error="'complex' not supported by GForce head")
+test(2019.06, DT[, tail(v, 1), by=id], error="'complex' not supported by GForce tail")
+test(2019.07, DT[, v[1], by=id], error="'complex' not supported by GForce subset")
+test(2019.08, DT[, sd(v), by=id], error="'complex' not supported by GForce sd")
+test(2019.09, DT[, var(v), by=id], error="'complex' not supported by GForce var")
+test(2019.10, DT[, prod(v), by=id], error="'complex' not supported by GForce prod")
 DT = data.table(id = c(1L,1L,2L,2L), v = c(1L, 2L, NA, NA))
 test(2019.11, DT[, median(v), id], data.table(id=1:2, V1=c(1.5, NA))) # median whole group has NAs
 

From 42d0fa72615fc8c5fbd6b7dc888092eab2f43d51 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Mon, 1 Apr 2019 15:18:13 -0700
Subject: [PATCH 7/8] added checks on optimization messages, to act as
 documentation/reminders in the tests.Raw file too

---
 NEWS.md               |  2 +-
 inst/tests/tests.Rraw | 52 ++++++++++++++++++++++++++-----------------
 2 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index a128cb717d..c73c869c32 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,7 +8,7 @@
 
 #### BUG FIXES
 
-1. `head` and `tail` by group were optimized but no longer require `n` to be provided (the default is n=6L), [#3462](https://github.com/Rdatatable/data.table/issues/3462).
+1. `head` and `tail` by group no longer error when `n=` is not provided (i.e. the default `n=6L` now works), [#3462](https://github.com/Rdatatable/data.table/issues/3462).
 
 #### NOTES
 
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index f51c6046c0..5e08636260 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -13927,37 +13927,49 @@ unlink(tt)
 
 # gfirst(.SD) throws an error about not using head(.SD, n), but the latter works #2030
 DT = data.table(id = c(1L,1L,2L), v = 1:3)
-test(2017.1, DT[, first(.SD), by=id, .SDcols="v"], data.table(id=1:2, v=c(1L,3L)))
-test(2017.2, DT[, first(v), by=id], data.table(id=1:2, V1=c(1L,3L)))
-test(2017.3, DT[, last(v), by=id], data.table(id=1:2, V1=c(2L,3L)))
-test(2017.4, DT[, v[1L], by=id], data.table(id=1:2, V1=c(1L,3L)))
+test(2017.1, DT[, first(.SD), by=id, .SDcols="v", verbose=TRUE], data.table(id=1:2, v=c(1L,3L)),  output="optimized j to 'list(gfirst(v))'")
+test(2017.2, DT[, first(v), by=id,                verbose=TRUE], data.table(id=1:2, V1=c(1L,3L)), output="optimized j to 'gfirst(v)'")
+test(2017.3, DT[, last(v), by=id,                 verbose=TRUE], data.table(id=1:2, V1=c(2L,3L)), output="optimized j to 'glast(v)'")
+test(2017.4, DT[, v[1L], by=id,                   verbose=TRUE], data.table(id=1:2, V1=c(1L,3L)), output="optimized j to '`g[`(v, 1L)'")
 DT = data.table(id = c(1L,1L,2L), v = 1:3, y = 3:1, z = c(TRUE, TRUE, FALSE), u = c("a","b","c"))
-test(2017.5, DT[, first(.SD), by=id, .SDcols=c("v","y","z","u")], data.table(id=1:2, v=c(1L,3L), y=c(3L,1L), z=c(TRUE,FALSE), u=c("a","c")))
-test(2017.6, DT[, last(.SD), by=id, .SDcols=c("v","y","z","u")], data.table(id=1:2, v=c(2L,3L), y=c(2L,1L), z=c(TRUE,FALSE), u=c("b","c")))
-test(2017.7, DT[, .SD[1L], by=id, .SDcols=c("v","y","z","u")], data.table(id=1:2, v=c(1L,3L), y=c(3L,1L), z=c(TRUE,FALSE), u=c("a","c")))
+test(2017.5, DT[, first(.SD), by=id, .SDcols=c("v","y","z","u"), verbose=TRUE],
+             data.table(id=1:2, v=c(1L,3L), y=c(3L,1L), z=c(TRUE,FALSE), u=c("a","c")),
+             output="optimized j to 'list(gfirst(v), gfirst(y), gfirst(z), gfirst(u))'")
+test(2017.6, DT[, last(.SD), by=id, .SDcols=c("v","y","z","u"), verbose=TRUE],
+             data.table(id=1:2, v=c(2L,3L), y=c(2L,1L), z=c(TRUE,FALSE), u=c("b","c")),
+             output="optimized j to 'list(glast(v), glast(y), glast(z), glast(u))'")
+test(2017.7, DT[, .SD[1L], by=id, .SDcols=c("v","y","z","u"), verbose=TRUE],
+             data.table(id=1:2, v=c(1L,3L), y=c(3L,1L), z=c(TRUE,FALSE), u=c("a","c")),
+             output="optimized j to 'list(`g[`(v, 1L), `g[`(y, 1L), `g[`(z, 1L), `g[`(u, 1L))'")
 # ghead argument "n" is missing, with no default #3462
 DT = data.table(a=c(rep(1L, 7L), rep(2L, 5L)), b=1:12, d=12:1)
-test(2018.1, DT[, head(.SD), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12), d=c(12:7, 5:1)))
-test(2018.2, DT[, head(b), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), V1=c(1:6, 8:12)))
+test(2018.1, DT[, head(.SD), a, verbose=TRUE],
+             data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12), d=c(12:7, 5:1)),
+             output=c("lapply optimization changed j from 'head(.SD)' to 'list(head(b, n = 6L), head(d, n = 6L))'",
+                      "GForce is on, left j unchanged"))
+test(2018.2, DT[, head(b), a, verbose=TRUE],
+             data.table(a=c(rep(1L, 6L), rep(2L, 5L)), V1=c(1:6, 8:12)),
+             output=c("lapply optimization is on, j unchanged as 'head(b)'",
+                      "GForce is on, left j unchanged"))
 test(2018.3, DT[, tail(.SD), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(2:7, 8:12), d=c(11:6, 5:1)))
 test(2018.4, DT[, tail(b), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), V1=c(2:7, 8:12)))
 # gforce tests coverage
 if (test_bit64) {
   DT = data.table(id=c(rep(1L,3), rep(2L, 3)), v=bit64::as.integer64(c(1:3, 4L, 5:6)))
-  test(2019.01, DT[2:6, sum(v), id], data.table(id=1:2, V1=bit64::as.integer64(c(5L,15L)))) # gather, case of int64 and irows
+  test(2019, DT[2:6, sum(v), id], data.table(id=1:2, V1=bit64::as.integer64(c(5L,15L)))) # gather, case of int64 and irows
 }
 DT = data.table(id = c(1L,1L,2L), v = c(1i, 2i, 3i))
-test(2019.02, DT[, min(v), by=id], error="'complex' not supported by GForce min")
-test(2019.03, DT[, max(v), by=id], error="'complex' not supported by GForce max")
-test(2019.04, DT[, median(v), by=id], error="'complex' not supported by GForce median")
-test(2019.05, DT[, head(v, 1), by=id], error="'complex' not supported by GForce head")
-test(2019.06, DT[, tail(v, 1), by=id], error="'complex' not supported by GForce tail")
-test(2019.07, DT[, v[1], by=id], error="'complex' not supported by GForce subset")
-test(2019.08, DT[, sd(v), by=id], error="'complex' not supported by GForce sd")
-test(2019.09, DT[, var(v), by=id], error="'complex' not supported by GForce var")
-test(2019.10, DT[, prod(v), by=id], error="'complex' not supported by GForce prod")
+test(2020.01, DT[, min(v), by=id], error="'complex' not supported by GForce min")
+test(2020.02, DT[, max(v), by=id], error="'complex' not supported by GForce max")
+test(2020.03, DT[, median(v), by=id], error="'complex' not supported by GForce median")
+test(2020.04, DT[, head(v, 1), by=id], error="'complex' not supported by GForce head")
+test(2020.05, DT[, tail(v, 1), by=id], error="'complex' not supported by GForce tail")
+test(2020.06, DT[, v[1], by=id], error="'complex' not supported by GForce subset")
+test(2020.07, DT[, sd(v), by=id], error="'complex' not supported by GForce sd")
+test(2020.08, DT[, var(v), by=id], error="'complex' not supported by GForce var")
+test(2020.09, DT[, prod(v), by=id], error="'complex' not supported by GForce prod")
 DT = data.table(id = c(1L,1L,2L,2L), v = c(1L, 2L, NA, NA))
-test(2019.11, DT[, median(v), id], data.table(id=1:2, V1=c(1.5, NA))) # median whole group has NAs
+test(2020.10, DT[, median(v), id], data.table(id=1:2, V1=c(1.5, NA))) # median whole group has NAs
 
 
 ###################################

From 94835225a4ebc4af738dcf50db70598af286b76e Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Mon, 1 Apr 2019 15:25:46 -0700
Subject: [PATCH 8/8] news item

---
 NEWS.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index c73c869c32..cd462c3663 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,11 +4,9 @@
 
 #### NEW FEATURES
 
-1. `first` and `last` by group are now optimized, [#2030](https://github.com/Rdatatable/data.table/issues/2030). Thanks to @franknarf1 for reporting.
-
 #### BUG FIXES
 
-1. `head` and `tail` by group no longer error when `n=` is not provided (i.e. the default `n=6L` now works), [#3462](https://github.com/Rdatatable/data.table/issues/3462).
+1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
 
 #### NOTES