From fc03664cb6edfa569f88bbd64c4b1efc226c8eee Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjamin.schwendinger@tuwien.ac.at>
Date: Fri, 30 Jul 2021 20:46:09 +0200
Subject: [PATCH 01/16] added ghead/gtail support of n>1

---
 NEWS.md               |   2 +
 R/data.table.R        |  26 ++++-
 inst/tests/tests.Rraw |  30 ++++-
 src/gsumm.c           | 258 +++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 307 insertions(+), 9 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 5d6a7da3ea..c31945bf4d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -93,6 +93,8 @@
 
 13. `fifelse()` now coerces logical `NA` to other types and the `na` argument supports vectorized input, [#4277](https://github.com/Rdatatable/data.table/issues/4277) [#4286](https://github.com/Rdatatable/data.table/issues/4286) [#4287](https://github.com/Rdatatable/data.table/issues/4287). Thanks to @michaelchirico and @shrektan for reporting, and @shrektan for implementing.
 
+14. `ghead(n)` / `gtail(n)` now also accepct values for n > 1, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). Thanks to @jangorecki and @myoung3 for reporting, and Benjamin Schwendinger for the PR.
+
 ## BUG FIXES
 
 1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.
diff --git a/R/data.table.R b/R/data.table.R
index c15d65f034..f95b3b9e7a 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1733,13 +1733,13 @@ replace_dot_alias = function(e) {
           # is.symbol() is for #1369, #1974 and #2949
           if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE)
           if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE)  # 875
-          if ((length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na"))) && (!q1 %chin% c("head","tail"))) return(TRUE)
+          if ((length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na")))) return(TRUE)
           #                       ^^ base::startWith errors on NULL unfortunately
           #        head-tail uses default value n=6 which as of now should not go gforce ... ^^
           # otherwise there must be three arguments, and only in two cases:
           #   1) head/tail(x, 1) or 2) x[n], n>0
           length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) &&
-            ( (q1 %chin% c("head", "tail") && q3==1L) || ((q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && q3>0L) )
+            ( (q1 %chin% c("head", "tail")) || ((q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && q3>0L) )
         }
         if (jsub[[1L]]=="list") {
           GForce = TRUE
@@ -1755,6 +1755,8 @@ replace_dot_alias = function(e) {
               if (length(jsub[[ii]])==3L) jsub[[ii]][[3L]] = eval(jsub[[ii]][[3L]], parent.frame())  # tests 1187.2 & 1187.4
             }
           else {
+            # adding argument to ghead/gtail if none is supplied to g-optimized head/tail
+            if (length(jsub) == 2L && jsub[[1L]] %chin% c("head", "tail")) jsub[["n"]] = 6L
             jsub[[1L]] = as.name(paste0("g", jsub[[1L]]))
             if (length(jsub)==3L) jsub[[3L]] = eval(jsub[[3L]], parent.frame())   # tests 1187.3 & 1187.5
           }
@@ -1834,6 +1836,26 @@ replace_dot_alias = function(e) {
     ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971.
     gi = if (length(o__)) o__[f__] else f__
     g = lapply(grpcols, function(i) groups[[i]][gi])
+
+    # adding ghead/gtail(n) support for n > 1 #5060 #523
+    q3 = 0
+    if (!is.symbol(jsub)) {
+      headTail_arg = function(q) {
+        if (length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) &&
+         (q1 <- q[[1L]]) %chin% c("ghead", "gtail") && q3!=1) q3
+        else 0
+      }
+
+      if (jsub[[1L]] == "list"){
+        q3 = max(sapply(jsub, headTail_arg))
+      } else if (length(jsub)==3L) {
+        q3 = headTail_arg(jsub)
+      }
+    }
+    if (q3 > 0) {
+      grplens = pmin.int(q3, len__)
+      g = list(rep(unlist(g), grplens))
+    }
     ans = c(g, ans)
   } else {
     ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index aa654a236c..588395587f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -8094,7 +8094,8 @@ test(1579.19, dt[, tail(.SD,1L), by=x],    dt[, utils::tail(.SD,1L), by=x])
 test(1579.20, dt[, tail(.SD,1L), keyby=x], dt[, utils::tail(.SD,1L), keyby=x])
 test(1579.21, dt[, tail(.SD,1L), keyby=x], dt[, utils::tail(.SD,1L), keyby=x])
 # GForce _doesn't_ work when n > 1
-test(1579.22, dt[ , tail(.SD, 2), by = x, verbose = TRUE], output = 'GForce FALSE')
+# removed test since GForce now works for head/tail with n > 1
+# test(1579.22, dt[ , tail(.SD, 2), by = x, verbose = TRUE], output = 'GForce FALSE')
 
 mysub <- function(x, n) x[n]
 test(1579.23, dt[, .SD[2],  by=x],    dt[, mysub(.SD,2),  by=x])
@@ -14654,11 +14655,11 @@ DT = data.table(a=c(rep(1L, 7L), rep(2L, 5L)), b=1:12, d=12:1)
 test(2018.1, DT[, head(.SD), a, verbose=TRUE],
              data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(1:6, 8:12), d=c(12:7, 5:1)),
              output=c("lapply optimization changed j from 'head(.SD)' to 'list(head(b, n = 6L), head(d, n = 6L))'",
-                      "GForce is on, left j unchanged"))
+                      "GForce optimized j to 'list(ghead(b, n = 6L), ghead(d, n = 6L))'"))
 test(2018.2, DT[, head(b), a, verbose=TRUE],
              data.table(a=c(rep(1L, 6L), rep(2L, 5L)), V1=c(1:6, 8:12)),
              output=c("lapply optimization is on, j unchanged as 'head(b)'",
-                      "GForce is on, left j unchanged"))
+                      "GForce optimized j to 'ghead(b, n = 6L)'"))
 test(2018.3, DT[, tail(.SD), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), b=c(2:7, 8:12), d=c(11:6, 5:1)))
 test(2018.4, DT[, tail(b), a], data.table(a=c(rep(1L, 6L), rep(2L, 5L)), V1=c(2:7, 8:12)))
 # gforce tests coverage
@@ -17802,3 +17803,26 @@ test(2199.1, as.data.table(as.list(1:2))[, .SD,.SDcols=(-1L)],   data.table(V2=2
 test(2199.2, as.data.table(as.list(1:2))[, .SD,.SDcols=(-(1L))], data.table(V2=2L))
 test(2199.3, as.data.table(as.list(1:3))[, .SD,.SDcols=(-1L)],   data.table(V2=2L, V3=3L))
 test(2199.4, data.table(V1=-1L, V2=-2L, V3=-3L)[,.SD,.SDcols=-V2:-V1], error="not found")
+
+# gforce head/tail for n > 1
+dt = data.table(x  = sample(letters[1:5], 20, TRUE),
+                i1 = sample(-10:10, 20, TRUE),
+                i2 = sample(c(-10:10, NA), 20, TRUE),
+                d1 = as.numeric(sample(-10:10, 20, TRUE)),
+                d2 = as.numeric(sample(c(-10:10, NA), 20, TRUE)))
+test(2200.01, dt[, head(.SD,2),    by=x], dt[, utils::head(.SD,2),    by=x])
+test(2200.02, dt[, head(.SD,2),    by=x], dt[, utils::head(.SD,2),    by=x])
+test(2200.03, dt[, head(.SD,2), keyby=x], dt[, utils::head(.SD,2), keyby=x])
+test(2200.04, dt[, head(.SD,2), keyby=x], dt[, utils::head(.SD,2), keyby=x])
+test(2200.05, dt[, head(.SD,5),    by=x], dt[, utils::head(.SD,5),    by=x])
+test(2200.06, dt[, head(.SD,5),    by=x], dt[, utils::head(.SD,5),    by=x])
+test(2200.07, dt[, head(.SD,5), keyby=x], dt[, utils::head(.SD,5), keyby=x])
+test(2200.08, dt[, head(.SD,5), keyby=x], dt[, utils::head(.SD,5), keyby=x])
+test(2200.11, dt[, tail(.SD,2),    by=x], dt[, utils::tail(.SD,2),    by=x])
+test(2200.12, dt[, tail(.SD,2),    by=x], dt[, utils::tail(.SD,2),    by=x])
+test(2200.13, dt[, tail(.SD,2), keyby=x], dt[, utils::tail(.SD,2), keyby=x])
+test(2200.14, dt[, tail(.SD,2), keyby=x], dt[, utils::tail(.SD,2), keyby=x])
+test(2200.15, dt[, tail(.SD,5),    by=x], dt[, utils::tail(.SD,5),    by=x])
+test(2200.16, dt[, tail(.SD,5),    by=x], dt[, utils::tail(.SD,5),    by=x])
+test(2200.17, dt[, tail(.SD,5), keyby=x], dt[, utils::tail(.SD,5), keyby=x])
+test(2200.18, dt[, tail(.SD,5), keyby=x], dt[, utils::tail(.SD,5), keyby=x])
\ No newline at end of file
diff --git a/src/gsumm.c b/src/gsumm.c
index 651f1c3385..eacac95b14 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -1186,13 +1186,263 @@ SEXP gfirst(SEXP x) {
 }
 
 SEXP gtail(SEXP x, SEXP valArg) {
-  if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, gtail is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
-  return (glast(x));
+  if (!isInteger(valArg) || LENGTH(valArg)!=1) error(_("Internal error, gtail is only implemented for n>0. This should have been caught before. please report to data.table issue tracker.")); // # nocov
+  const int val=INTEGER(valArg)[0];
+  if (val == 1) return (glast(x));
+
+  const int n = (irowslen == -1) ? length(x) : irowslen;
+  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gtail");
+
+  int anslen = 0;
+  for (int i=0; i<ngrp; ++i){
+    if (val > grpsize[i]) anslen += grpsize[i]; else anslen += val;
+  }
+  SEXP ans;
+  switch(TYPEOF(x)) {
+  case LGLSXP: {
+    const int *ix = LOGICAL(x);
+    ans = PROTECT(allocVector(LGLSXP, anslen));
+    int *ians = LOGICAL(ans);
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      const int rev = (val > thisgrpsize ? thisgrpsize : val);
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+thisgrpsize-2-j;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        ians[offset+(rev-1-j)] = ix[k];
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case INTSXP: {
+    const int *ix = INTEGER(x);
+    ans = PROTECT(allocVector(INTSXP, anslen));
+    int *ians = INTEGER(ans);
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      const int rev = (val > thisgrpsize ? thisgrpsize : val);
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+thisgrpsize-2-j;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        ians[offset+(rev-1-j)] = ix[k];
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case REALSXP: {
+    const double *dx = REAL(x);
+    ans = PROTECT(allocVector(REALSXP, anslen));
+    double *dans = REAL(ans);
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      const int rev = (val > thisgrpsize ? thisgrpsize : val);
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+thisgrpsize-2-j;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        dans[offset+(rev-1-j)] = dx[k];
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case CPLXSXP: {
+    const Rcomplex *dx = COMPLEX(x);
+    ans = PROTECT(allocVector(CPLXSXP, anslen));
+    Rcomplex *dans = COMPLEX(ans);
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      const int rev = (val > thisgrpsize ? thisgrpsize : val);
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+thisgrpsize-2-j;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        dans[offset+(rev-1-j)] = dx[k];
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case STRSXP: {
+    ans = PROTECT(allocVector(STRSXP, anslen));
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      const int rev = (val > thisgrpsize ? thisgrpsize : val);
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+thisgrpsize-2-j;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        SET_STRING_ELT(ans, offset+(rev-1-j), STRING_ELT(x, k));
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case VECSXP: {
+    ans = PROTECT(allocVector(VECSXP, anslen));
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      const int rev = (val > thisgrpsize ? thisgrpsize : val);
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+thisgrpsize-2-j;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        SET_VECTOR_ELT(ans, offset+(rev-1-j), VECTOR_ELT(x, k));
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  default:
+    error(_("Type '%s' not supported by GForce gtail. Either add the prefix utils::tail(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
+  }
+  copyMostAttrib(x, ans);
+  UNPROTECT(1);
+  return(ans);
+
 }
 
 SEXP ghead(SEXP x, SEXP valArg) {
-  if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, ghead is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
-  return (gfirst(x));
+  if (!isInteger(valArg) || LENGTH(valArg)!=1) error(_("Internal error, ghead is only implemented for n>0. This should have been caught before. please report to data.table issue tracker.")); // # nocov
+  const int val=INTEGER(valArg)[0];
+  if (val == 1) return (gfirst(x));
+
+  const int n = (irowslen == -1) ? length(x) : irowslen;
+  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "ghead");
+
+  int anslen = 0;
+  for (int i=0; i<ngrp; ++i){
+    const int thisgrpsize = grpsize[i];
+    if (val > thisgrpsize) anslen += thisgrpsize; else anslen += val;
+  }
+  SEXP ans;
+  switch(TYPEOF(x)) {
+  case LGLSXP: {
+    const int *ix = LOGICAL(x);
+    ans = PROTECT(allocVector(LGLSXP, anslen));
+    int *ians = LOGICAL(ans);
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+j-1;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        ians[offset+j] = ix[k];
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case INTSXP: {
+    const int *ix = INTEGER(x);
+    ans = PROTECT(allocVector(INTSXP, anslen));
+    int *ians = INTEGER(ans);
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+j-1;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        ians[offset+j] = ix[k];
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case REALSXP: {
+    const double *dx = REAL(x);
+    ans = PROTECT(allocVector(REALSXP, anslen));
+    double *dans = REAL(ans);
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+j-1;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        dans[offset+j] = dx[k];
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case CPLXSXP: {
+    const Rcomplex *dx = COMPLEX(x);
+    ans = PROTECT(allocVector(CPLXSXP, anslen));
+    Rcomplex *dans = COMPLEX(ans);
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+j-1;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        dans[offset+j] = dx[k];
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case STRSXP: {
+    ans = PROTECT(allocVector(STRSXP, anslen));
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+j-1;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        SET_STRING_ELT(ans, offset+j, STRING_ELT(x, k));
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  case VECSXP: {
+    ans = PROTECT(allocVector(VECSXP, anslen));
+    int offset = 0;
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      for (int j=0; j<val; ++j) {
+        if (j > thisgrpsize-1) break;
+        int k = ff[i]+j-1;
+        if (isunsorted) k = oo[k]-1;
+        k = (irowslen == -1) ? k : irows[k]-1;
+        SET_VECTOR_ELT(ans, offset+j, VECTOR_ELT(x, k));
+      }
+      if (val > thisgrpsize) offset += thisgrpsize; else offset += val;
+    }
+  }
+    break;
+  default:
+    error(_("Type '%s' not supported by GForce ghead. Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
+  }
+  copyMostAttrib(x, ans);
+  UNPROTECT(1);
+  return(ans);
 }
 
 SEXP gnthvalue(SEXP x, SEXP valArg) {

From 805849f3ece233e3e4a9b6adecd14b62b8577363 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Tue, 24 Aug 2021 23:06:52 -0600
Subject: [PATCH 02/16] macro'd to avoid repeated logic and for easier
 maintenance/extension

---
 src/gsumm.c | 149 +++++++++++++++++-----------------------------------
 1 file changed, 49 insertions(+), 100 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index bee1be31b0..cd8791f625 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -904,11 +904,10 @@ static SEXP gfirstlast(SEXP x, const bool first, const int w, const bool headw)
   // w: which item (1 other than for gnthvalue when could be >1)
   // headw: select 1:w of each group when first=true, and (n-w+1):n when first=false (i.e. tail)
   const bool nosubset = irowslen == -1;
+  const bool issorted = !isunsorted; // make a const-bool for use inside loops
   const int n = nosubset ? length(x) : irowslen;
-  SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, first?"gfirst":"glast");
-  const bool gnth = w>1 && !headw;  // const bool to avoid fetching grpsize[i] when not needed
-  if (w==1 && headw) error(_("Internal error: headw should only be true when w>1"));
+  if (w==1 && headw) error(_("Internal error: gfirstlast headw should only be true when w>1"));
   int anslen = ngrp;
   if (headw) {
     anslen = 0;
@@ -916,105 +915,55 @@ static SEXP gfirstlast(SEXP x, const bool first, const int w, const bool headw)
       anslen += MIN(w, grpsize[i]);
     }
   }
+  SEXP ans = PROTECT(allocVector(TYPEOF(x), anslen));
   int ansi = 0;
-  switch(TYPEOF(x)) {
-  case LGLSXP: {
-    const int *xd = LOGICAL(x);
-    ans = PROTECT(allocVector(LGLSXP, anslen));
-    int *ansd = LOGICAL(ans);
-    for (int i=0; i<ngrp; ++i) {
-      const int grpn = grpsize[i];
-      if (gnth && w>grpn) { ansd[ansi++]=NA_LOGICAL; continue; }
-      const int thisn = headw ? MIN(w,grpn) : 1;
-      int jstart = ff[i]-1 + (first ? (!headw)*(w-1) : (grpn-thisn));
-      const int jend = jstart+thisn;
-      for (int j=jstart; j<jend; ++j) {
-        const int k = isunsorted ? oo[j]-1 : j;
-        ansd[ansi++] = nosubset ? xd[k] : (irows[k]==NA_INTEGER ? NA_LOGICAL : xd[irows[k]-1]);
-      }
-    }
-  }
-    break;
-  case INTSXP: {
-    const int *xd = INTEGER(x);
-    ans = PROTECT(allocVector(INTSXP, anslen));
-    int *ansd = INTEGER(ans);
-    for (int i=0; i<ngrp; ++i) {
-      const int grpn = grpsize[i];
-      if (gnth && w>grpn) { ansd[ansi++]=NA_INTEGER; continue; }
-      const int thisn = headw ? MIN(w,grpn) : 1;
-      int jstart = ff[i]-1 + (first ? (!headw)*(w-1) : (grpn-thisn));
-      const int jend = jstart+thisn;
-      for (int j=jstart; j<jend; ++j) {
-        const int k = isunsorted ? oo[j]-1 : j;
-        ansd[ansi++] = nosubset ? xd[k] : (irows[k]==NA_INTEGER ? NA_INTEGER : xd[irows[k]-1]);
-      }
-    }
-  }
-    break;
-  case REALSXP: {
-    const double *xd = REAL(x);
-    ans = PROTECT(allocVector(REALSXP, anslen));
-    double *ansd = REAL(ans);
-    for (int i=0; i<ngrp; ++i) {
-      const int grpn = grpsize[i];
-      if (gnth && w>grpn) { ansd[ansi++]=NA_REAL; continue; }
-      const int thisn = headw ? MIN(w,grpn) : 1;
-      int jstart = ff[i]-1 + (first ? (!headw)*(w-1) : (grpn-thisn));
-      const int jend = jstart+thisn;
-      for (int j=jstart; j<jend; ++j) {
-        const int k = isunsorted ? oo[j]-1 : j;
-        ansd[ansi++] = nosubset ? xd[k] : (irows[k]==NA_INTEGER ? NA_REAL : xd[irows[k]-1]);
-      }
-    }
+  #define DO(CTYPE, RTYPE, RNA, ASSIGN) {                                                          \
+    const CTYPE *xd = RTYPE(x);                                                                    \
+    if (headw) {                                                                                   \
+      /* returning more than 1 per group; w>1 */                                                   \
+      for (int i=0; i<ngrp; ++i) {                                                                 \
+        const int grpn = grpsize[i];                                                               \
+        const int thisn = MIN(w, grpn);                                                            \
+        const int jstart = ff[i]-1+ (!first)*(grpn-thisn);                                         \
+        const int jend = jstart+thisn;                                                             \
+        for (int j=jstart; j<jend; ++j) {                                                          \
+          const int k = issorted ? j : oo[j]-1;                                                    \
+          /* ternary on const-bool assumed to be branch-predicted and ok inside loops */           \
+          const CTYPE val = nosubset ? xd[k] : (irows[k]==NA_INTEGER ? RNA : xd[irows[k]-1]);      \
+          ASSIGN;                                                                                  \
+        }                                                                                          \
+      }                                                                                            \
+    } else if (w==1) {                                                                             \
+      for (int i=0; i<ngrp; ++i) {                                                                 \
+        const int j = ff[i]-1 + (first ? 0 : grpsize[i]-1);                                        \
+        const int k = issorted ? j : oo[j]-1;                                                      \
+        const CTYPE val = nosubset ? xd[k] : (irows[k]==NA_INTEGER ? RNA : xd[irows[k]-1]);        \
+        ASSIGN;                                                                                    \
+      }                                                                                            \
+    } else if (w>1 && first) {                                                                     \
+      /* gnthvalue */                                                                              \
+      for (int i=0; i<ngrp; ++i) {                                                                 \
+        const int grpn = grpsize[i];                                                               \
+        if (w>grpn) { const CTYPE val=RNA; ASSIGN; continue; }                                     \
+        const int j = ff[i]-1+w-1;                                                                 \
+        const int k = issorted ? j : oo[j]-1;                                                      \
+        const CTYPE val = nosubset ? xd[k] : (irows[k]==NA_INTEGER ? RNA : xd[irows[k]-1]);        \
+        ASSIGN;                                                                                    \
+      }                                                                                            \
+    } else {                                                                                       \
+      /* w>1 && !first not supported because -i in R means everything-but-i and gnthvalue */       \
+      /* currently takes n>0 only. However, we could still support n'th from the end, somehow */   \
+      error(_("Internal error: unanticipated case in gfirstlast first=%d w=%d headw=%d"),          \
+              first, w, headw);                                                                    \
+    }                                                                                              \
   }
-    break;
-  case CPLXSXP: {
-    const Rcomplex *xd = COMPLEX(x);
-    ans = PROTECT(allocVector(CPLXSXP, anslen));
-    Rcomplex *ansd = COMPLEX(ans);
-    for (int i=0; i<ngrp; ++i) {
-      const int grpn = grpsize[i];
-      if (gnth && w>grpn) { ansd[ansi++]=NA_CPLX; continue; }
-      const int thisn = headw ? MIN(w,grpn) : 1;
-      int jstart = ff[i]-1 + (first ? (!headw)*(w-1) : (grpn-thisn));
-      const int jend = jstart+thisn;
-      for (int j=jstart; j<jend; ++j) {
-        const int k = isunsorted ? oo[j]-1 : j;
-        ansd[ansi++] = nosubset ? xd[k] : (irows[k]==NA_INTEGER ? NA_CPLX : xd[irows[k]-1]);
-      }
-    }
-  } break;
-  case STRSXP: {
-    const SEXP *xd = STRING_PTR(x);
-    ans = PROTECT(allocVector(STRSXP, anslen));
-    for (int i=0; i<ngrp; ++i) {
-      const int grpn = grpsize[i];
-      if (gnth && w>grpn) { SET_STRING_ELT(ans, ansi++, NA_STRING); continue; }
-      const int thisn = headw ? MIN(w,grpn) : 1;
-      int jstart = ff[i]-1 + (first ? (!headw)*(w-1) : (grpn-thisn));
-      const int jend = jstart+thisn;
-      for (int j=jstart; j<jend; ++j) {
-        const int k = isunsorted ? oo[j]-1 : j;
-        SET_STRING_ELT(ans, ansi++, nosubset ? xd[k] : (irows[k]==NA_INTEGER ? NA_STRING : xd[irows[k]-1]));
-      }
-    }
-  } break;
-  case VECSXP: {
-    const SEXP *xd = SEXPPTR_RO(x);
-    ans = PROTECT(allocVector(VECSXP, anslen));
-    for (int i=0; i<ngrp; ++i) {
-      const int grpn = grpsize[i];
-      if (gnth && w>grpn) { SET_VECTOR_ELT(ans, ansi++, ScalarLogical(NA_LOGICAL)); continue; }
-      const int thisn = headw ? MIN(w,grpn) : 1;
-      int jstart = ff[i]-1 + (first ? (!headw)*(w-1) : (grpn-thisn));
-      const int jend = jstart+thisn;
-      for (int j=jstart; j<jend; ++j) {
-        const int k = isunsorted ? oo[j]-1 : j;
-        SET_VECTOR_ELT(ans, ansi++, nosubset ? xd[k] : (irows[k]==NA_INTEGER ? ScalarLogical(NA_LOGICAL) : xd[irows[k]-1]));
-      }
-    }
-  } break;
+  switch(TYPEOF(x)) {
+  case LGLSXP:  { int      *ansd=LOGICAL(ans); DO(int,      LOGICAL, NA_LOGICAL, ansd[ansi++]=val) } break;
+  case INTSXP:  { int      *ansd=INTEGER(ans); DO(int,      INTEGER, NA_INTEGER, ansd[ansi++]=val) } break;
+  case REALSXP: { double   *ansd=REAL(ans);    DO(double,   REAL,    NA_REAL,    ansd[ansi++]=val) } break;
+  case CPLXSXP: { Rcomplex *ansd=COMPLEX(ans); DO(Rcomplex, COMPLEX, NA_CPLX,    ansd[ansi++]=val) } break;
+  case STRSXP:  DO(SEXP, STRING_PTR, NA_STRING,                 SET_STRING_ELT(ans,ansi++,val))      break;
+  case VECSXP:  DO(SEXP, SEXPPTR_RO, ScalarLogical(NA_LOGICAL), SET_VECTOR_ELT(ans,ansi++,val))      break;
   default:
     error(_("Type '%s' not supported by GForce head/tail/first/last/`[`. Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
   }

From 19fc827e0b017769eaa29c820e70cf0c05fe7391 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Tue, 24 Aug 2021 23:15:46 -0600
Subject: [PATCH 03/16] remove identical tests

---
 inst/tests/tests.Rraw | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index fa5a6383c2..e7a684b30f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18043,19 +18043,11 @@ dt = data.table(x  = sample(letters[1:5], 20, TRUE),
                 d1 = as.numeric(sample(-10:10, 20, TRUE)),
                 d2 = as.numeric(sample(c(-10:10, NA), 20, TRUE)))
 test(2212.01, dt[, head(.SD,2),    by=x], dt[, utils::head(.SD,2),    by=x])
-test(2212.02, dt[, head(.SD,2),    by=x], dt[, utils::head(.SD,2),    by=x])
-test(2212.03, dt[, head(.SD,2), keyby=x], dt[, utils::head(.SD,2), keyby=x])
-test(2212.04, dt[, head(.SD,2), keyby=x], dt[, utils::head(.SD,2), keyby=x])
-test(2212.05, dt[, head(.SD,5),    by=x], dt[, utils::head(.SD,5),    by=x])
-test(2212.06, dt[, head(.SD,5),    by=x], dt[, utils::head(.SD,5),    by=x])
-test(2212.07, dt[, head(.SD,5), keyby=x], dt[, utils::head(.SD,5), keyby=x])
-test(2212.08, dt[, head(.SD,5), keyby=x], dt[, utils::head(.SD,5), keyby=x])
-test(2212.11, dt[, tail(.SD,2),    by=x], dt[, utils::tail(.SD,2),    by=x])
-test(2212.12, dt[, tail(.SD,2),    by=x], dt[, utils::tail(.SD,2),    by=x])
-test(2212.13, dt[, tail(.SD,2), keyby=x], dt[, utils::tail(.SD,2), keyby=x])
-test(2212.14, dt[, tail(.SD,2), keyby=x], dt[, utils::tail(.SD,2), keyby=x])
-test(2212.15, dt[, tail(.SD,5),    by=x], dt[, utils::tail(.SD,5),    by=x])
-test(2212.16, dt[, tail(.SD,5),    by=x], dt[, utils::tail(.SD,5),    by=x])
-test(2212.17, dt[, tail(.SD,5), keyby=x], dt[, utils::tail(.SD,5), keyby=x])
-test(2212.18, dt[, tail(.SD,5), keyby=x], dt[, utils::tail(.SD,5), keyby=x])
+test(2212.02, dt[, head(.SD,2), keyby=x], dt[, utils::head(.SD,2), keyby=x])
+test(2212.03, dt[, head(.SD,5),    by=x], dt[, utils::head(.SD,5),    by=x])
+test(2212.04, dt[, head(.SD,5), keyby=x], dt[, utils::head(.SD,5), keyby=x])
+test(2212.05, dt[, tail(.SD,2),    by=x], dt[, utils::tail(.SD,2),    by=x])
+test(2212.06, dt[, tail(.SD,2), keyby=x], dt[, utils::tail(.SD,2), keyby=x])
+test(2212.07, dt[, tail(.SD,5),    by=x], dt[, utils::tail(.SD,5),    by=x])
+test(2212.08, dt[, tail(.SD,5), keyby=x], dt[, utils::tail(.SD,5), keyby=x])
 

From 35f9f4a4aba1df73fab6745ee002f1bd293abcca Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Tue, 24 Aug 2021 23:17:01 -0600
Subject: [PATCH 04/16] dt renamed DT in tests

---
 inst/tests/tests.Rraw | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e7a684b30f..8a5c28e596 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18037,17 +18037,17 @@ for (col in c("a","b","c")) {
 }
 
 # gforce head/tail for n>1, #5060
-dt = data.table(x  = sample(letters[1:5], 20, TRUE),
+DT = data.table(x  = sample(letters[1:5], 20, TRUE),
                 i1 = sample(-10:10, 20, TRUE),
                 i2 = sample(c(-10:10, NA), 20, TRUE),
                 d1 = as.numeric(sample(-10:10, 20, TRUE)),
                 d2 = as.numeric(sample(c(-10:10, NA), 20, TRUE)))
-test(2212.01, dt[, head(.SD,2),    by=x], dt[, utils::head(.SD,2),    by=x])
-test(2212.02, dt[, head(.SD,2), keyby=x], dt[, utils::head(.SD,2), keyby=x])
-test(2212.03, dt[, head(.SD,5),    by=x], dt[, utils::head(.SD,5),    by=x])
-test(2212.04, dt[, head(.SD,5), keyby=x], dt[, utils::head(.SD,5), keyby=x])
-test(2212.05, dt[, tail(.SD,2),    by=x], dt[, utils::tail(.SD,2),    by=x])
-test(2212.06, dt[, tail(.SD,2), keyby=x], dt[, utils::tail(.SD,2), keyby=x])
-test(2212.07, dt[, tail(.SD,5),    by=x], dt[, utils::tail(.SD,5),    by=x])
-test(2212.08, dt[, tail(.SD,5), keyby=x], dt[, utils::tail(.SD,5), keyby=x])
+test(2212.01, DT[, head(.SD,2),    by=x], DT[, utils::head(.SD,2),    by=x])
+test(2212.02, DT[, head(.SD,2), keyby=x], DT[, utils::head(.SD,2), keyby=x])
+test(2212.03, DT[, head(.SD,5),    by=x], DT[, utils::head(.SD,5),    by=x])
+test(2212.04, DT[, head(.SD,5), keyby=x], DT[, utils::head(.SD,5), keyby=x])
+test(2212.05, DT[, tail(.SD,2),    by=x], DT[, utils::tail(.SD,2),    by=x])
+test(2212.06, DT[, tail(.SD,2), keyby=x], DT[, utils::tail(.SD,2), keyby=x])
+test(2212.07, DT[, tail(.SD,5),    by=x], DT[, utils::tail(.SD,5),    by=x])
+test(2212.08, DT[, tail(.SD,5), keyby=x], DT[, utils::tail(.SD,5), keyby=x])
 

From 4fa5fe7edb631d0d30f67873ebd570f5d96c2302 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Tue, 24 Aug 2021 23:39:22 -0600
Subject: [PATCH 05/16] more NA in tests and added integer64

---
 inst/tests/tests.Rraw | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 8a5c28e596..cbb04763f5 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18037,17 +18037,20 @@ for (col in c("a","b","c")) {
 }
 
 # gforce head/tail for n>1, #5060
-DT = data.table(x  = sample(letters[1:5], 20, TRUE),
-                i1 = sample(-10:10, 20, TRUE),
-                i2 = sample(c(-10:10, NA), 20, TRUE),
-                d1 = as.numeric(sample(-10:10, 20, TRUE)),
-                d2 = as.numeric(sample(c(-10:10, NA), 20, TRUE)))
-test(2212.01, DT[, head(.SD,2),    by=x], DT[, utils::head(.SD,2),    by=x])
-test(2212.02, DT[, head(.SD,2), keyby=x], DT[, utils::head(.SD,2), keyby=x])
-test(2212.03, DT[, head(.SD,5),    by=x], DT[, utils::head(.SD,5),    by=x])
-test(2212.04, DT[, head(.SD,5), keyby=x], DT[, utils::head(.SD,5), keyby=x])
-test(2212.05, DT[, tail(.SD,2),    by=x], DT[, utils::tail(.SD,2),    by=x])
-test(2212.06, DT[, tail(.SD,2), keyby=x], DT[, utils::tail(.SD,2), keyby=x])
-test(2212.07, DT[, tail(.SD,5),    by=x], DT[, utils::tail(.SD,5),    by=x])
-test(2212.08, DT[, tail(.SD,5), keyby=x], DT[, utils::tail(.SD,5), keyby=x])
+set.seed(99)
+DT = data.table(x = sample(letters[1:5], 20, TRUE),
+                i = sample(c(-2L,0L,3L,NA), 20, TRUE),
+                d = sample(c(1.2,-3.4,5.6,NA), 20, TRUE))
+if (test_bit64) DT[, i64:=sample(as.integer64(c(-2200000000,+2400000000,NA)), 20, TRUE)]
+old = options(datatable.optimize=2L)
+test(2212.01, DT[, .N, by=x]$N, INT(4,6,5,2,3))   # the smallest group is 2, so n=5 tests n constrained to grpsize
+test(2212.02, DT[, head(.SD,2),    by=x, verbose=TRUE], DT[, utils::head(.SD,2),    by=x], output="optimized.*ghead")
+test(2212.03, DT[, head(.SD,2), keyby=x, verbose=TRUE], DT[, utils::head(.SD,2), keyby=x], output="optimized.*ghead")
+test(2212.04, DT[, head(.SD,5),    by=x, verbose=TRUE], DT[, utils::head(.SD,5),    by=x], output="optimized.*ghead")
+test(2212.05, DT[, head(.SD,5), keyby=x, verbose=TRUE], DT[, utils::head(.SD,5), keyby=x], output="optimized.*ghead")
+test(2212.06, DT[, tail(.SD,2),    by=x, verbose=TRUE], DT[, utils::tail(.SD,2),    by=x], output="optimized.*gtail")
+test(2212.07, DT[, tail(.SD,2), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,2), keyby=x], output="optimized.*gtail")
+test(2212.08, DT[, tail(.SD,5),    by=x, verbose=TRUE], DT[, utils::tail(.SD,5),    by=x], output="optimized.*gtail")
+test(2212.09, DT[, tail(.SD,5), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,5), keyby=x], output="optimized.*gtail")
+options(old)
 

From 64f019240f7071bd56548381b244d1b00dca6f6e Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Tue, 24 Aug 2021 23:46:39 -0600
Subject: [PATCH 06/16] added char and vec cols to test

---
 inst/tests/tests.Rraw | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index cbb04763f5..9da737cf5e 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18040,7 +18040,9 @@ for (col in c("a","b","c")) {
 set.seed(99)
 DT = data.table(x = sample(letters[1:5], 20, TRUE),
                 i = sample(c(-2L,0L,3L,NA), 20, TRUE),
-                d = sample(c(1.2,-3.4,5.6,NA), 20, TRUE))
+                d = sample(c(1.2,-3.4,5.6,NA), 20, TRUE),
+                s = sample(c("foo","bar",NA), 20, TRUE),
+                l = sample(list(1:3, mean, letters[4:5], NULL), 20, replace=TRUE))
 if (test_bit64) DT[, i64:=sample(as.integer64(c(-2200000000,+2400000000,NA)), 20, TRUE)]
 old = options(datatable.optimize=2L)
 test(2212.01, DT[, .N, by=x]$N, INT(4,6,5,2,3))   # the smallest group is 2, so n=5 tests n constrained to grpsize

From b6cd87f9ddf979e1ea7c9eaa512668908410db4b Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 00:00:45 -0600
Subject: [PATCH 07/16] more dup tests elsewhere

---
 inst/tests/tests.Rraw | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 9da737cf5e..b0b39120a4 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -8121,17 +8121,10 @@ test(1579.21, dt[, tail(.SD,1L), keyby=x], dt[, utils::tail(.SD,1L), keyby=x])
 # test(1579.22, dt[ , tail(.SD, 2), by = x, verbose = TRUE], output = 'GForce FALSE')
 
 mysub <- function(x, n) x[n]
-test(1579.23, dt[, .SD[2],  by=x],    dt[, mysub(.SD,2),  by=x])
-test(1579.24, dt[, .SD[2],  by=x],    dt[, mysub(.SD,2),  by=x])
-test(1579.25, dt[, .SD[2],  keyby=x], dt[, mysub(.SD,2),  keyby=x])
-test(1579.26, dt[, .SD[2],  keyby=x], dt[, mysub(.SD,2),  keyby=x])
-test(1579.27, dt[, .SD[2L], by=x],    dt[, mysub(.SD,2L), by=x])
-test(1579.28, dt[, .SD[2L], by=x],    dt[, mysub(.SD,2L), by=x])
-test(1579.29, dt[, .SD[2L], keyby=x], dt[, mysub(.SD,2L), keyby=x])
-test(1579.30, dt[, .SD[2L], keyby=x], dt[, mysub(.SD,2L), keyby=x])
-
-ans = capture.output(dt[, .SD[2], by=x, verbose=TRUE])
-test(1579.31, any(grepl("GForce optimized", ans)), TRUE)
+test(1579.23, dt[, .SD[2],  by=x, verbose=TRUE], dt[, mysub(.SD,2),  by=x], output="GForce optimized.*g[[]")
+test(1579.24, dt[, .SD[2],  keyby=x], dt[, mysub(.SD,2),  keyby=x])
+test(1579.25, dt[, .SD[2L], by=x],    dt[, mysub(.SD,2L), by=x])
+test(1579.26, dt[, .SD[2L], keyby=x], dt[, mysub(.SD,2L), keyby=x])
 
 options(datatable.optimize = Inf)
 

From 4d718825a4e139d787c4cbc2c4f588910b9b0350 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 00:04:29 -0600
Subject: [PATCH 08/16] new test shows gnth on int64 is wrong NA when n>grpsize

---
 inst/tests/tests.Rraw | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b0b39120a4..520b9fdcd9 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -8125,6 +8125,8 @@ test(1579.23, dt[, .SD[2],  by=x, verbose=TRUE], dt[, mysub(.SD,2),  by=x], outp
 test(1579.24, dt[, .SD[2],  keyby=x], dt[, mysub(.SD,2),  keyby=x])
 test(1579.25, dt[, .SD[2L], by=x],    dt[, mysub(.SD,2L), by=x])
 test(1579.26, dt[, .SD[2L], keyby=x], dt[, mysub(.SD,2L), keyby=x])
+test(1579.27, dt[, .SD[15], by=x],    dt[, mysub(.SD,2L), by=x])   # 15 tests > grpsize and that NA is correct including for integer64
+test(1579.28, dt[, .SD[15L], keyby=x],dt[, mysub(.SD,2L), keyby=x])
 
 options(datatable.optimize = Inf)
 

From 47cb68a0496f25892401ebe45e6553abce5d4db5 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 00:19:11 -0600
Subject: [PATCH 09/16] now tests 1579.27 & 1579.28 do show the wrong NA for
 int64 (y still had 2 not 15 in prev commit)

---
 inst/tests/tests.Rraw | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 520b9fdcd9..3b16efa04e 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -8125,8 +8125,8 @@ test(1579.23, dt[, .SD[2],  by=x, verbose=TRUE], dt[, mysub(.SD,2),  by=x], outp
 test(1579.24, dt[, .SD[2],  keyby=x], dt[, mysub(.SD,2),  keyby=x])
 test(1579.25, dt[, .SD[2L], by=x],    dt[, mysub(.SD,2L), by=x])
 test(1579.26, dt[, .SD[2L], keyby=x], dt[, mysub(.SD,2L), keyby=x])
-test(1579.27, dt[, .SD[15], by=x],    dt[, mysub(.SD,2L), by=x])   # 15 tests > grpsize and that NA is correct including for integer64
-test(1579.28, dt[, .SD[15L], keyby=x],dt[, mysub(.SD,2L), keyby=x])
+test(1579.27, dt[, .SD[15], by=x],    dt[, mysub(.SD,15), by=x])   # 15 tests > grpsize and that NA is correct including for integer64
+test(1579.28, dt[, .SD[15], keyby=x], dt[, mysub(.SD,15), keyby=x])
 
 options(datatable.optimize = Inf)
 

From 7009d1dc74fba92caec72aaf686e21a39f51644e Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 00:37:40 -0600
Subject: [PATCH 10/16] added int64 branch (easier now macro'd) so new tests
 1579.27 & 1579.28 now pass

---
 src/gsumm.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index cd8791f625..7470f9f527 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -918,7 +918,7 @@ static SEXP gfirstlast(SEXP x, const bool first, const int w, const bool headw)
   SEXP ans = PROTECT(allocVector(TYPEOF(x), anslen));
   int ansi = 0;
   #define DO(CTYPE, RTYPE, RNA, ASSIGN) {                                                          \
-    const CTYPE *xd = RTYPE(x);                                                                    \
+    const CTYPE *xd = (const CTYPE *)RTYPE(x);                                                     \
     if (headw) {                                                                                   \
       /* returning more than 1 per group; w>1 */                                                   \
       for (int i=0; i<ngrp; ++i) {                                                                 \
@@ -958,12 +958,14 @@ static SEXP gfirstlast(SEXP x, const bool first, const int w, const bool headw)
     }                                                                                              \
   }
   switch(TYPEOF(x)) {
-  case LGLSXP:  { int      *ansd=LOGICAL(ans); DO(int,      LOGICAL, NA_LOGICAL, ansd[ansi++]=val) } break;
-  case INTSXP:  { int      *ansd=INTEGER(ans); DO(int,      INTEGER, NA_INTEGER, ansd[ansi++]=val) } break;
-  case REALSXP: { double   *ansd=REAL(ans);    DO(double,   REAL,    NA_REAL,    ansd[ansi++]=val) } break;
-  case CPLXSXP: { Rcomplex *ansd=COMPLEX(ans); DO(Rcomplex, COMPLEX, NA_CPLX,    ansd[ansi++]=val) } break;
-  case STRSXP:  DO(SEXP, STRING_PTR, NA_STRING,                 SET_STRING_ELT(ans,ansi++,val))      break;
-  case VECSXP:  DO(SEXP, SEXPPTR_RO, ScalarLogical(NA_LOGICAL), SET_VECTOR_ELT(ans,ansi++,val))      break;
+  case LGLSXP:  { int      *ansd=LOGICAL(ans); DO(int,      LOGICAL, NA_LOGICAL,   ansd[ansi++]=val) } break;
+  case INTSXP:  { int      *ansd=INTEGER(ans); DO(int,      INTEGER, NA_INTEGER,   ansd[ansi++]=val) } break;
+  case REALSXP: if (INHERITS(x, char_integer64)) {
+           int64_t *ansd=(int64_t *)REAL(ans); DO(int64_t,  REAL,    NA_INTEGER64, ansd[ansi++]=val) }
+           else { double      *ansd=REAL(ans); DO(double,   REAL,    NA_REAL,      ansd[ansi++]=val) } break;
+  case CPLXSXP: { Rcomplex *ansd=COMPLEX(ans); DO(Rcomplex, COMPLEX, NA_CPLX,      ansd[ansi++]=val) } break;
+  case STRSXP:  DO(SEXP, STRING_PTR, NA_STRING,                 SET_STRING_ELT(ans,ansi++,val))        break;
+  case VECSXP:  DO(SEXP, SEXPPTR_RO, ScalarLogical(NA_LOGICAL), SET_VECTOR_ELT(ans,ansi++,val))        break;
   default:
     error(_("Type '%s' not supported by GForce head/tail/first/last/`[`. Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
   }

From ba272b79a147e12594493d91367b2a153e75c42a Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 00:59:46 -0600
Subject: [PATCH 11/16] grouping by 2 columns fails with invalid 'times'
 argument; extra test

---
 inst/tests/tests.Rraw | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 3b16efa04e..70cacaa5fe 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18034,6 +18034,7 @@ for (col in c("a","b","c")) {
 # gforce head/tail for n>1, #5060
 set.seed(99)
 DT = data.table(x = sample(letters[1:5], 20, TRUE),
+                y = rep.int(1:2, 10),  # to test 2 grouping columns get rep'd properly
                 i = sample(c(-2L,0L,3L,NA), 20, TRUE),
                 d = sample(c(1.2,-3.4,5.6,NA), 20, TRUE),
                 s = sample(c("foo","bar",NA), 20, TRUE),
@@ -18049,5 +18050,6 @@ test(2212.06, DT[, tail(.SD,2),    by=x, verbose=TRUE], DT[, utils::tail(.SD,2),
 test(2212.07, DT[, tail(.SD,2), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,2), keyby=x], output="optimized.*gtail")
 test(2212.08, DT[, tail(.SD,5),    by=x, verbose=TRUE], DT[, utils::tail(.SD,5),    by=x], output="optimized.*gtail")
 test(2212.09, DT[, tail(.SD,5), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,5), keyby=x], output="optimized.*gtail")
+test(2212.10, DT[, tail(.SD,2), by=.(x,y), verbose=TRUE], DT[, utils::tail(.SD,2), keyby=x], output="optimized.*gtail")
 options(old)
 

From 3926224c14b88f6d534f8bb5fbd0450a0a2987cd Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 01:05:30 -0600
Subject: [PATCH 12/16] rep on 2 group columns fixed

---
 R/data.table.R | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 66f81afc71..26c01def71 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1852,7 +1852,6 @@ replace_dot_alias = function(e) {
          (q1 <- q[[1L]]) %chin% c("ghead", "gtail") && q3!=1) q3
         else 0
       }
-
       if (jsub[[1L]] == "list"){
         q3 = max(sapply(jsub, headTail_arg))
       } else if (length(jsub)==3L) {
@@ -1861,7 +1860,7 @@ replace_dot_alias = function(e) {
     }
     if (q3 > 0) {
       grplens = pmin.int(q3, len__)
-      g = list(rep(unlist(g), grplens))
+      g = lapply(g, rep.int, times=grplens)
     }
     ans = c(g, ans)
   } else {

From aff4a4756878988047b0f703e90691e2391d5569 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 01:06:46 -0600
Subject: [PATCH 13/16] trailing whitespace

---
 NEWS.md        | 2 +-
 R/data.table.R | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 8448fefac5..a17ecfd067 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -132,7 +132,7 @@
     # 1:     1
     # 2:     3
     ```
-    
+
 24. `DT[, head(.SD,n), by=grp]` (and `tail`) is now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). Thanks to Jan Gorecki and Michael Young for requesting, and Benjamin Schwendinger for the PR.
 
 ## BUG FIXES
diff --git a/R/data.table.R b/R/data.table.R
index 26c01def71..4dfa9c276a 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -809,8 +809,8 @@ replace_dot_alias = function(e) {
           # when the 'by' expression includes get/mget/eval, all.vars cannot be trusted to infer all used columns, #4981
           allbyvars = NULL
         else
-          allbyvars = intersect(all.vars(bysub), names_x)  
-        
+          allbyvars = intersect(all.vars(bysub), names_x)
+
         orderedirows = .Call(CisOrderedSubset, irows, nrow(x))  # TRUE when irows is NULL (i.e. no i clause). Similar but better than is.sorted(f__)
         bysameorder = byindex = FALSE
         if (!bysub %iscall% ":" && ##Fix #4285

From b13c3859b13ae4c1c5d5afdccc3de57f1b61404f Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 01:10:30 -0600
Subject: [PATCH 14/16] fix test

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 70cacaa5fe..997326958c 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -18050,6 +18050,6 @@ test(2212.06, DT[, tail(.SD,2),    by=x, verbose=TRUE], DT[, utils::tail(.SD,2),
 test(2212.07, DT[, tail(.SD,2), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,2), keyby=x], output="optimized.*gtail")
 test(2212.08, DT[, tail(.SD,5),    by=x, verbose=TRUE], DT[, utils::tail(.SD,5),    by=x], output="optimized.*gtail")
 test(2212.09, DT[, tail(.SD,5), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,5), keyby=x], output="optimized.*gtail")
-test(2212.10, DT[, tail(.SD,2), by=.(x,y), verbose=TRUE], DT[, utils::tail(.SD,2), keyby=x], output="optimized.*gtail")
+test(2212.10, DT[, tail(.SD,2), by=.(x,y), verbose=TRUE], DT[, utils::tail(.SD,2), by=.(x,y)], output="optimized.*gtail")
 options(old)
 

From cd018e7565031437aca4357b3b36ed84f30b6d86 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 01:35:17 -0600
Subject: [PATCH 15/16] move tests up

---
 inst/tests/tests.Rraw | 49 ++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 997326958c..32b16e471f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -8116,18 +8116,37 @@ test(1579.18, dt[, tail(.SD,1L), by=x],    dt[, utils::tail(.SD,1L), by=x])
 test(1579.19, dt[, tail(.SD,1L), by=x],    dt[, utils::tail(.SD,1L), by=x])
 test(1579.20, dt[, tail(.SD,1L), keyby=x], dt[, utils::tail(.SD,1L), keyby=x])
 test(1579.21, dt[, tail(.SD,1L), keyby=x], dt[, utils::tail(.SD,1L), keyby=x])
-# GForce _doesn't_ work when n > 1
-# removed test since GForce now works for head/tail with n > 1
-# test(1579.22, dt[ , tail(.SD, 2), by = x, verbose = TRUE], output = 'GForce FALSE')
+# 1579.22 tested gtail with n>1; now 1579.4+ below
 
 mysub <- function(x, n) x[n]
 test(1579.23, dt[, .SD[2],  by=x, verbose=TRUE], dt[, mysub(.SD,2),  by=x], output="GForce optimized.*g[[]")
 test(1579.24, dt[, .SD[2],  keyby=x], dt[, mysub(.SD,2),  keyby=x])
 test(1579.25, dt[, .SD[2L], by=x],    dt[, mysub(.SD,2L), by=x])
 test(1579.26, dt[, .SD[2L], keyby=x], dt[, mysub(.SD,2L), keyby=x])
-test(1579.27, dt[, .SD[15], by=x],    dt[, mysub(.SD,15), by=x])   # 15 tests > grpsize and that NA is correct including for integer64
+test(1579.27, dt[, .SD[15], by=x],    dt[, mysub(.SD,15), by=x])   # tests 15 > grpsize and that NA is correct including for integer64
 test(1579.28, dt[, .SD[15], keyby=x], dt[, mysub(.SD,15), keyby=x])
 
+# gforce head/tail for n>1, #5060
+set.seed(99)
+DT = data.table(x = sample(letters[1:5], 20, TRUE),
+                y = rep.int(1:2, 10),  # to test 2 grouping columns get rep'd properly
+                i = sample(c(-2L,0L,3L,NA), 20, TRUE),
+                d = sample(c(1.2,-3.4,5.6,NA), 20, TRUE),
+                s = sample(c("foo","bar",NA), 20, TRUE),
+                l = sample(list(1:3, mean, letters[4:5], NULL), 20, replace=TRUE))
+if (test_bit64) DT[, i64:=sample(as.integer64(c(-2200000000,+2400000000,NA)), 20, TRUE)]
+options(datatable.optimize=2L)
+test(1579.401, DT[, .N, by=x]$N, INT(4,6,5,2,3))   # the smallest group is 2, so n=5 tests n constrained to grpsize
+test(1579.402, DT[, head(.SD,2),    by=x, verbose=TRUE], DT[, utils::head(.SD,2),    by=x], output="optimized.*ghead")
+test(1579.403, DT[, head(.SD,2), keyby=x, verbose=TRUE], DT[, utils::head(.SD,2), keyby=x], output="optimized.*ghead")
+test(1579.404, DT[, head(.SD,5),    by=x, verbose=TRUE], DT[, utils::head(.SD,5),    by=x], output="optimized.*ghead")
+test(1579.405, DT[, head(.SD,5), keyby=x, verbose=TRUE], DT[, utils::head(.SD,5), keyby=x], output="optimized.*ghead")
+test(1579.406, DT[, tail(.SD,2),    by=x, verbose=TRUE], DT[, utils::tail(.SD,2),    by=x], output="optimized.*gtail")
+test(1579.407, DT[, tail(.SD,2), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,2), keyby=x], output="optimized.*gtail")
+test(1579.408, DT[, tail(.SD,5),    by=x, verbose=TRUE], DT[, utils::tail(.SD,5),    by=x], output="optimized.*gtail")
+test(1579.409, DT[, tail(.SD,5), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,5), keyby=x], output="optimized.*gtail")
+test(1579.410, DT[, tail(.SD,2), by=.(x,y), verbose=TRUE], DT[, utils::tail(.SD,2), by=.(x,y)], output="optimized.*gtail")
+
 options(datatable.optimize = Inf)
 
 # test for #1419, rleid doesn't remove names attribute
@@ -18031,25 +18050,3 @@ for (col in c("a","b","c")) {
   }
 }
 
-# gforce head/tail for n>1, #5060
-set.seed(99)
-DT = data.table(x = sample(letters[1:5], 20, TRUE),
-                y = rep.int(1:2, 10),  # to test 2 grouping columns get rep'd properly
-                i = sample(c(-2L,0L,3L,NA), 20, TRUE),
-                d = sample(c(1.2,-3.4,5.6,NA), 20, TRUE),
-                s = sample(c("foo","bar",NA), 20, TRUE),
-                l = sample(list(1:3, mean, letters[4:5], NULL), 20, replace=TRUE))
-if (test_bit64) DT[, i64:=sample(as.integer64(c(-2200000000,+2400000000,NA)), 20, TRUE)]
-old = options(datatable.optimize=2L)
-test(2212.01, DT[, .N, by=x]$N, INT(4,6,5,2,3))   # the smallest group is 2, so n=5 tests n constrained to grpsize
-test(2212.02, DT[, head(.SD,2),    by=x, verbose=TRUE], DT[, utils::head(.SD,2),    by=x], output="optimized.*ghead")
-test(2212.03, DT[, head(.SD,2), keyby=x, verbose=TRUE], DT[, utils::head(.SD,2), keyby=x], output="optimized.*ghead")
-test(2212.04, DT[, head(.SD,5),    by=x, verbose=TRUE], DT[, utils::head(.SD,5),    by=x], output="optimized.*ghead")
-test(2212.05, DT[, head(.SD,5), keyby=x, verbose=TRUE], DT[, utils::head(.SD,5), keyby=x], output="optimized.*ghead")
-test(2212.06, DT[, tail(.SD,2),    by=x, verbose=TRUE], DT[, utils::tail(.SD,2),    by=x], output="optimized.*gtail")
-test(2212.07, DT[, tail(.SD,2), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,2), keyby=x], output="optimized.*gtail")
-test(2212.08, DT[, tail(.SD,5),    by=x, verbose=TRUE], DT[, utils::tail(.SD,5),    by=x], output="optimized.*gtail")
-test(2212.09, DT[, tail(.SD,5), keyby=x, verbose=TRUE], DT[, utils::tail(.SD,5), keyby=x], output="optimized.*gtail")
-test(2212.10, DT[, tail(.SD,2), by=.(x,y), verbose=TRUE], DT[, utils::tail(.SD,2), by=.(x,y)], output="optimized.*gtail")
-options(old)
-

From fde19a9c643bbdacb91549e6ac641ec28bc3cdf3 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Wed, 25 Aug 2021 01:55:03 -0600
Subject: [PATCH 16/16] news item tweak

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index a17ecfd067..4c9647cdfb 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -133,7 +133,7 @@
     # 2:     3
     ```
 
-24. `DT[, head(.SD,n), by=grp]` (and `tail`) is now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). Thanks to Jan Gorecki and Michael Young for requesting, and Benjamin Schwendinger for the PR.
+24. `DT[, head(.SD,n), by=grp]` and `tail` are now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). `n==1` was already optimized. Thanks to Jan Gorecki and Michael Young for requesting, and Benjamin Schwendinger for the PR.
 
 ## BUG FIXES