From 150c898e5e0d03e645cdb2829085c532aef3619e Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjamin.schwendinger@tuwien.ac.at>
Date: Sun, 22 Aug 2021 18:28:33 +0200
Subject: [PATCH 01/12] init fix

---
 inst/tests/tests.Rraw | 38 +++++++++++++++++++++++++++++++++++++
 src/gsumm.c           | 44 +++++++++++++++++++++++++++----------------
 2 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 6626e4f0af..a7ff7d944f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -17979,3 +17979,41 @@ test(2209.3, fread('A|B,C\n1|+,4\n2|-,5\n3|-,6\n', dec=','), data.table(A=1:3, '
 test(2209.4, fread('A|B|C\n1|0,4|a\n2|0,5|b\n', dec=','), data.table(A=1:2, B=c(0.4,0.5), C=c("a","b")))  # ok before
 test(2209.5, fread('A|B,C\n1|0,4\n2|0,5\n', dec=','), data.table(A=1:2, "B,C"=c(0.4,0.5)))
 
+# edge case bug in GForce #1994
+DT = data.table(a=1L, b=2, c="a")[0] # empty table
+# for integers
+test(2210.01, DT[rep(NA_integer_, 10), sum(a, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::sum(a, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.02, DT[rep(NA_integer_, 10), sum(a, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::sum(a, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.03, DT[rep(NA_integer_, 10), mean(a, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::mean(a, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.04, DT[rep(NA_integer_, 10), mean(a, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::mean(a, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.05, DT[rep(NA_integer_, 10), var(a, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), stats::var(a, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.06, DT[rep(NA_integer_, 10), var(a, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), stats::var(a, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.07, DT[rep(NA_integer_, 10), sd(a, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), stats::sd(a, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.08, DT[rep(NA_integer_, 10), sd(a, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), stats::sd(a, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.09, DT[rep(NA_integer_, 10), median(a, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), stats::median(a, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.10, DT[rep(NA_integer_, 10), median(a, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), stats::median(a, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.11, DT[rep(NA_integer_, 10), prod(a, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::prod(a, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.12, DT[rep(NA_integer_, 10), prod(a, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::prod(a, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.13, DT[rep(NA_integer_, 10), min(a, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::min(a, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.14, DT[rep(NA_integer_, 10), min(a, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::min(a, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.15, DT[rep(NA_integer_, 10), max(a, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::max(a, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.16, DT[rep(NA_integer_, 10), max(a, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::max(a, na.rm=TRUE), by=list(vv=1:10)])
+# for numeric
+
+# for character
diff --git a/src/gsumm.c b/src/gsumm.c
index 0fe05d1299..e94fca1316 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -235,7 +235,7 @@ void *gather(SEXP x, bool *anyNA)
       } else {
         const int *my_x = irows + b*batchSize;
         for (int i=0; i<howMany; i++) {
-          int elem = thisx[ my_x[i]-1 ];
+          int elem = my_x[i] == NA_INTEGER ? NA_INTEGER : thisx[ my_x[i]-1 ];
           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
           if (elem==NA_INTEGER) my_anyNA = true;
         }
@@ -264,7 +264,7 @@ void *gather(SEXP x, bool *anyNA)
         } else {
           const int *my_x = irows + b*batchSize;
           for (int i=0; i<howMany; i++) {
-            double elem = thisx[ my_x[i]-1 ];
+            double elem = my_x[i] == NA_INTEGER ? NA_REAL : thisx[ my_x[i]-1 ];
             my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
             if (ISNAN(elem)) my_anyNA = true;
           }
@@ -291,7 +291,7 @@ void *gather(SEXP x, bool *anyNA)
         } else {
           const int *my_x = irows + b*batchSize;
           for (int i=0; i<howMany; i++) {
-            int64_t elem = thisx[ my_x[i]-1 ];
+            int64_t elem = my_x[i] == NA_INTEGER ? NA_INTEGER64 : thisx[ my_x[i]-1 ];
             my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
             if (elem==INT64_MIN) my_anyNA = true;
           }
@@ -322,7 +322,7 @@ void *gather(SEXP x, bool *anyNA)
       } else {
         const int *my_x = irows + b*batchSize;
         for (int i=0; i<howMany; i++) {
-          Rcomplex elem = thisx[ my_x[i]-1 ];
+          Rcomplex elem = my_x[i] == NA_INTEGER ? NA_CPLX : thisx[ my_x[i]-1 ];
           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
           if (ISNAN(elem.r) && ISNAN(elem.i)) my_anyNA = true;
         }
@@ -867,7 +867,8 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
         int k = ff[i]+j-1;
         if (isunsorted) k = oo[k]-1;
         k = (irowslen == -1) ? k : irows[k]-1;
-        if (isInt64 ? xi64[k]==NA_INTEGER64 : ISNAN(xd[k])) nacount++;
+        if (k+1 == NA_INTEGER) nacount++;
+        else if (isInt64 ? xi64[k]==NA_INTEGER64 : ISNAN(xd[k])) nacount++;
         else subd[j-nacount] = xd[k];
       }
       thisgrpsize -= nacount;  // all-NA is returned as NA_REAL via n==0 case inside *quickselect
@@ -884,7 +885,8 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
         int k = ff[i]+j-1;
         if (isunsorted) k = oo[k]-1;
         k = (irowslen == -1) ? k : irows[k]-1;
-        if (xi[k]==NA_INTEGER) nacount++;
+        if (k+1 == NA_INTEGER) nacount++;
+        else if (xi[k]==NA_INTEGER) nacount++;
         else subi[j-nacount] = xi[k];
       }
       ansd[i] = (nacount && !narm) ? NA_REAL : iquickselect(subi, thisgrpsize-nacount);
@@ -1082,7 +1084,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = ix[k];
+      ians[i] = (k+1 == NA_INTEGER) ? NA_LOGICAL : ix[k];
     }
   }
     break;
@@ -1095,7 +1097,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = ix[k];
+      ians[i] = (k+1 == NA_INTEGER) ? NA_INTEGER : ix[k];
     }
   }
     break;
@@ -1108,7 +1110,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = dx[k];
+      dans[i] = (k+1 == NA_INTEGER) ? NA_REAL : dx[k];
     }
   }
     break;
@@ -1121,7 +1123,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = dx[k];
+      dans[i] = (k+1 == NA_INTEGER) ? NA_CPLX : dx[k];
     }
   } break;
   case STRSXP:
@@ -1131,7 +1133,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_STRING_ELT(ans, i, STRING_ELT(x, k));
+      SET_STRING_ELT(ans, i, (k+1 == NA_INTEGER) ? NA_STRING : STRING_ELT(x, k));
     }
     break;
   case VECSXP:
@@ -1141,7 +1143,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
+      SET_VECTOR_ELT(ans, i, (k+1 == NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
     }
     break;
   default:
@@ -1315,9 +1317,13 @@ SEXP gprod(SEXP x, SEXP narm)
     for (int i=0; i<n; ++i) {
       const int thisgrp = grp[i];
       const int ix = (irowslen == -1) ? i : irows[i]-1;
-      if(INTEGER(x)[ix] == NA_INTEGER) {
-        if (!LOGICAL(narm)[0]) s[thisgrp] = NA_REAL;  // Let NA_REAL propogate from here. R_NaReal is IEEE.
-        continue;
+      if (!LOGICAL(narm)[0]) {
+        // Let NA_REAL propogate from here. R_NaReal is IEEE.
+        if (ix+1 == NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
+        if (INTEGER(x)[ix] == NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
+      } else {
+        if (ix+1 == NA_INTEGER) continue;
+        if (INTEGER(x)[ix] == NA_INTEGER) continue;
       }
       s[thisgrp] *= INTEGER(x)[ix];  // no under/overflow here, s is long double (like base)
     }
@@ -1331,7 +1337,13 @@ SEXP gprod(SEXP x, SEXP narm)
     for (int i=0; i<n; ++i) {
       const int thisgrp = grp[i];
       const int ix = (irowslen == -1) ? i : irows[i]-1;
-      if(ISNAN(REAL(x)[ix]) && LOGICAL(narm)[0]) continue;  // else let NA_REAL propogate from here
+      if (!LOGICAL(narm)[0]) {
+        if (ix+1 == NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
+        if (ISNAN(REAL(x)[ix])) { s[thisgrp] = NA_REAL; continue; }
+      } else {
+        if (ix+1 == NA_INTEGER) continue;
+        if (ISNAN(REAL(x)[ix])) continue; // else let NA_REAL propogate from here
+      }
       s[thisgrp] *= REAL(x)[ix];  // done in long double, like base
     }
     for (int i=0; i<ngrp; ++i) {

From 772337a15cad395e92d46a429e4709adcca8ad0f Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjamin.schwendinger@tuwien.ac.at>
Date: Sun, 22 Aug 2021 20:52:31 +0200
Subject: [PATCH 02/12] fixed gforce edge case

---
 NEWS.md               |   2 +
 inst/tests/tests.Rraw | 101 +++++++++++++++++++++++++++++++++++-------
 src/gsumm.c           |  52 +++++++++++++---------
 3 files changed, 119 insertions(+), 36 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index f6696b71e7..c073618be2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -269,6 +269,8 @@
     # 1: 2017-10-02 09:55:00
     ```
 
+39. `GForce` optimization could run into a segfault when using NA values as argument for subsetting I, [#1994](https://github.com/Rdatatable/data.table/issues/1994). This happened due to out-of-bound conditions, since NA_INTEGER is internally represented by INT_MIN - 1L. Thanks to Arun Srinivasan for reporting, and Benjamin Schwendinger for the PR.
+
 ## NOTES
 
 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.<type>()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example :
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index a7ff7d944f..dafacf780b 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -17980,7 +17980,7 @@ test(2209.4, fread('A|B|C\n1|0,4|a\n2|0,5|b\n', dec=','), data.table(A=1:2, B=c(
 test(2209.5, fread('A|B,C\n1|0,4\n2|0,5\n', dec=','), data.table(A=1:2, "B,C"=c(0.4,0.5)))
 
 # edge case bug in GForce #1994
-DT = data.table(a=1L, b=2, c="a")[0] # empty table
+DT = data.table(a=1L, b=2, c="a")
 # for integers
 test(2210.01, DT[rep(NA_integer_, 10), sum(a, na.rm=FALSE), by=list(vv=1:10)],
              DT[rep(NA_integer_, 10), base::sum(a, na.rm=FALSE), by=list(vv=1:10)])
@@ -17990,30 +17990,99 @@ test(2210.03, DT[rep(NA_integer_, 10), mean(a, na.rm=FALSE), by=list(vv=1:10)],
              DT[rep(NA_integer_, 10), base::mean(a, na.rm=FALSE), by=list(vv=1:10)])
 test(2210.04, DT[rep(NA_integer_, 10), mean(a, na.rm=TRUE), by=list(vv=1:10)],
              DT[rep(NA_integer_, 10), base::mean(a, na.rm=TRUE), by=list(vv=1:10)])
-test(2210.05, DT[rep(NA_integer_, 10), var(a, na.rm=FALSE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), stats::var(a, na.rm=FALSE), by=list(vv=1:10)])
-test(2210.06, DT[rep(NA_integer_, 10), var(a, na.rm=TRUE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), stats::var(a, na.rm=TRUE), by=list(vv=1:10)])
-test(2210.07, DT[rep(NA_integer_, 10), sd(a, na.rm=FALSE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), stats::sd(a, na.rm=FALSE), by=list(vv=1:10)])
-test(2210.08, DT[rep(NA_integer_, 10), sd(a, na.rm=TRUE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), stats::sd(a, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.05, DT[rep(NA_integer_, 10), var(a, na.rm=FALSE), by=list(vv=rep(1:5, each=2L))],
+             DT[rep(NA_integer_, 10), stats::var(a, na.rm=FALSE), by=list(vv=rep(1:5, each=2L))])
+test(2210.06, DT[rep(NA_integer_, 10), var(a, na.rm=TRUE), by=list(vv=rep(1:5, each=2L))],
+             DT[rep(NA_integer_, 10), stats::var(a, na.rm=TRUE), by=list(vv=rep(1:5, each=2L))])
+test(2210.07, DT[rep(NA_integer_, 10), sd(a, na.rm=FALSE), by=list(vv=rep(1:5, each=2L))],
+             DT[rep(NA_integer_, 10), stats::sd(a, na.rm=FALSE), by=list(vv=rep(1:5, each=2L))])
+test(2210.08, DT[rep(NA_integer_, 10), sd(a, na.rm=TRUE), by=list(vv=rep(1:5, each=2L))],
+             DT[rep(NA_integer_, 10), stats::sd(a, na.rm=TRUE), by=list(vv=rep(1:5, each=2L))])
 test(2210.09, DT[rep(NA_integer_, 10), median(a, na.rm=FALSE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), stats::median(a, na.rm=FALSE), by=list(vv=1:10)])
+             data.table(vv=1:10, V1=NA_real_))
 test(2210.10, DT[rep(NA_integer_, 10), median(a, na.rm=TRUE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), stats::median(a, na.rm=TRUE), by=list(vv=1:10)])
+             data.table(vv=1:10, V1=NA_real_))
 test(2210.11, DT[rep(NA_integer_, 10), prod(a, na.rm=FALSE), by=list(vv=1:10)],
              DT[rep(NA_integer_, 10), base::prod(a, na.rm=FALSE), by=list(vv=1:10)])
 test(2210.12, DT[rep(NA_integer_, 10), prod(a, na.rm=TRUE), by=list(vv=1:10)],
              DT[rep(NA_integer_, 10), base::prod(a, na.rm=TRUE), by=list(vv=1:10)])
 test(2210.13, DT[rep(NA_integer_, 10), min(a, na.rm=FALSE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), base::min(a, na.rm=FALSE), by=list(vv=1:10)])
+             data.table(vv=1:10, V1=NA_integer_))
 test(2210.14, DT[rep(NA_integer_, 10), min(a, na.rm=TRUE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), base::min(a, na.rm=TRUE), by=list(vv=1:10)])
+             data.table(vv=1:10, V1=NA_integer_))
 test(2210.15, DT[rep(NA_integer_, 10), max(a, na.rm=FALSE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), base::max(a, na.rm=FALSE), by=list(vv=1:10)])
+             data.table(vv=1:10, V1=NA_integer_))
 test(2210.16, DT[rep(NA_integer_, 10), max(a, na.rm=TRUE), by=list(vv=1:10)],
-             DT[rep(NA_integer_, 10), base::max(a, na.rm=TRUE), by=list(vv=1:10)])
+             data.table(vv=1:10, V1=NA_integer_))
+test(2210.17, DT[rep(NA_integer_, 10), `[`(a,2), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::`[`(a,2), by=list(vv=1:10)])
+test(2210.18, DT[rep(NA_integer_, 10), first(a, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::head(a, 1L), by=list(vv=1:10)])
+test(2210.19, DT[rep(NA_integer_, 10), last(a, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::tail(a, 1L), by=list(vv=1:10)])
+test(2210.20, DT[rep(NA_integer_, 10), head(a, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::head(a, 1L), by=list(vv=1:10)])
+test(2210.21, DT[rep(NA_integer_, 10), tail(a, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::tail(a, 1L), by=list(vv=1:10)])
 # for numeric
-
+test(2210.31, DT[rep(NA_integer_, 10), sum(b, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::sum(b, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.32, DT[rep(NA_integer_, 10), sum(b, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::sum(b, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.33, DT[rep(NA_integer_, 10), mean(b, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::mean(b, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.34, DT[rep(NA_integer_, 10), mean(b, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::mean(b, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.35, DT[rep(NA_integer_, 10), var(b, na.rm=FALSE), by=list(vv=rep(1:5, each=2L))],
+             DT[rep(NA_integer_, 10), stats::var(b, na.rm=FALSE), by=list(vv=rep(1:5, each=2L))])
+test(2210.36, DT[rep(NA_integer_, 10), var(b, na.rm=TRUE), by=list(vv=rep(1:5, each=2L))],
+             DT[rep(NA_integer_, 10), stats::var(b, na.rm=TRUE), by=list(vv=rep(1:5, each=2L))])
+test(2210.37, DT[rep(NA_integer_, 10), sd(b, na.rm=FALSE), by=list(vv=rep(1:5, each=2L))],
+             DT[rep(NA_integer_, 10), stats::sd(b, na.rm=FALSE), by=list(vv=rep(1:5, each=2L))])
+test(2210.38, DT[rep(NA_integer_, 10), sd(b, na.rm=TRUE), by=list(vv=rep(1:5, each=2L))],
+             DT[rep(NA_integer_, 10), stats::sd(b, na.rm=TRUE), by=list(vv=rep(1:5, each=2L))])
+test(2210.39, DT[rep(NA_integer_, 10), median(b, na.rm=FALSE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_real_))
+test(2210.40, DT[rep(NA_integer_, 10), median(b, na.rm=TRUE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_real_))
+test(2210.41, DT[rep(NA_integer_, 10), prod(b, na.rm=FALSE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::prod(b, na.rm=FALSE), by=list(vv=1:10)])
+test(2210.42, DT[rep(NA_integer_, 10), prod(b, na.rm=TRUE), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::prod(b, na.rm=TRUE), by=list(vv=1:10)])
+test(2210.43, DT[rep(NA_integer_, 10), min(b, na.rm=FALSE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_real_))
+test(2210.44, DT[rep(NA_integer_, 10), min(b, na.rm=TRUE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_real_))
+test(2210.45, DT[rep(NA_integer_, 10), max(b, na.rm=FALSE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_real_))
+test(2210.46, DT[rep(NA_integer_, 10), max(b, na.rm=TRUE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_real_))
+test(2210.47, DT[rep(NA_integer_, 10), `[`(b,2), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::`[`(b,2), by=list(vv=1:10)])
+test(2210.48, DT[rep(NA_integer_, 10), first(b, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::head(b, 1L), by=list(vv=1:10)])
+test(2210.49, DT[rep(NA_integer_, 10), last(b, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::tail(b, 1L), by=list(vv=1:10)])
+test(2210.50, DT[rep(NA_integer_, 10), head(b, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::head(b, 1L), by=list(vv=1:10)])
+test(2210.51, DT[rep(NA_integer_, 10), tail(b, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::tail(b, 1L), by=list(vv=1:10)])
 # for character
+test(2210.61, DT[rep(NA_integer_, 10), min(c, na.rm=TRUE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_character_))
+test(2210.62, DT[rep(NA_integer_, 10), min(c, na.rm=FALSE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_character_))
+test(2210.63, DT[rep(NA_integer_, 10), max(c, na.rm=TRUE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_character_))             
+test(2210.64, DT[rep(NA_integer_, 10), max(c, na.rm=FALSE), by=list(vv=1:10)],
+             data.table(vv=1:10, V1=NA_character_))
+test(2210.65, DT[rep(NA_integer_, 10), `[`(c,2), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), base::`[`(c,2), by=list(vv=1:10)])
+test(2210.66, DT[rep(NA_integer_, 10), first(c, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::head(c, 1L), by=list(vv=1:10)])
+test(2210.67, DT[rep(NA_integer_, 10), last(c, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::tail(c, 1L), by=list(vv=1:10)])
+test(2210.68, DT[rep(NA_integer_, 10), head(c, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::head(c, 1L), by=list(vv=1:10)])
+test(2210.69, DT[rep(NA_integer_, 10), tail(c, 1L), by=list(vv=1:10)],
+             DT[rep(NA_integer_, 10), utils::tail(c, 1L), by=list(vv=1:10)])
diff --git a/src/gsumm.c b/src/gsumm.c
index e94fca1316..be19772cc3 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -735,13 +735,15 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
         const int thisgrp = grp[i];
         if (ansd[thisgrp]==NA_INTEGER) continue;  // once an NA has been observed in this group, it doesn't matter what the remaining values in this group are
         const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (xd[ix]==NA_INTEGER || (xd[ix]<ansd[thisgrp])==min)   
+        if (ix+1==NA_INTEGER) ansd[thisgrp] = NA_INTEGER;
+        else if (xd[ix]==NA_INTEGER || (xd[ix]<ansd[thisgrp])==min)   
           ansd[thisgrp] = xd[ix];  // always true on the first value in the group (other than if the first value is INT_MAX or INT_MIN-1 which is fine too)
       }
     } else {
       for (int i=0; i<ngrp; ++i) ansd[i] = NA_INTEGER;  // in the all-NA case we now return NA for type consistency 
       for (int i=0; i<n; ++i) {
         const int ix = (irowslen == -1) ? i : irows[i]-1;
+        if (ix+1==NA_INTEGER) continue;
         if (xd[ix]==NA_INTEGER) continue;
         const int thisgrp = grp[i];
         if (ansd[thisgrp]==NA_INTEGER || (xd[ix]<ansd[thisgrp])==min)
@@ -760,13 +762,15 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
         const int thisgrp = grp[i];
         if (ansd[thisgrp]==NA_STRING) continue;
         const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (xd[ix]==NA_STRING || (strcmp(CHAR(xd[ix]), CHAR(ansd[thisgrp]))<0)==min)
+        if (ix+1==NA_INTEGER) SET_STRING_ELT(ans, thisgrp, NA_STRING);
+        else if (xd[ix]==NA_STRING || (strcmp(CHAR(xd[ix]), CHAR(ansd[thisgrp]))<0)==min)
           SET_STRING_ELT(ans, thisgrp, xd[ix]);
       }
     } else {
       for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, NA_STRING); // all missing returns NA consistent with base
       for (int i=0; i<n; ++i) {
         const int ix = (irowslen == -1) ? i : irows[i]-1;
+        if (ix+1==NA_INTEGER) continue;
         if (xd[ix]==NA_STRING) continue;
         const int thisgrp = grp[i];
         if (ansd[thisgrp]==NA_STRING || (strcmp(CHAR(xd[ix]), CHAR(ansd[thisgrp]))<0)==min)
@@ -786,13 +790,15 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
           const int thisgrp = grp[i];
           if (ansd[thisgrp]==NA_INTEGER64) continue;
           const int ix = (irowslen == -1) ? i : irows[i]-1;
-          if (xd[ix]==NA_INTEGER64 || (xd[ix]<ansd[thisgrp])==min)
+          if (ix+1==NA_INTEGER) ansd[thisgrp] = NA_INTEGER64;
+          else if (xd[ix]==NA_INTEGER64 || (xd[ix]<ansd[thisgrp])==min)
             ansd[thisgrp] = xd[ix];
         }
       } else {
         for (int i=0; i<ngrp; ++i) ansd[i] = NA_INTEGER64;
         for (int i=0; i<n; ++i) {
           const int ix = (irowslen == -1) ? i : irows[i]-1;
+          if (ix+1==NA_INTEGER) continue;
           if (xd[ix]==NA_INTEGER64) continue;
           const int thisgrp = grp[i];  
           if (ansd[thisgrp]==NA_INTEGER64 || (xd[ix]<ansd[thisgrp])==min)
@@ -809,13 +815,15 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
           const int thisgrp = grp[i];
           if (ISNAN(ansd[thisgrp])) continue;
           const int ix = (irowslen == -1) ? i : irows[i]-1;
-          if (ISNAN(xd[ix]) || (xd[ix]<ansd[thisgrp])==min)
+          if (ix+1==NA_INTEGER) ansd[thisgrp] = NA_REAL;
+          else if (ISNAN(xd[ix]) || (xd[ix]<ansd[thisgrp])==min)
             ansd[thisgrp] = xd[ix];
         }
       } else {
         for (int i=0; i<ngrp; ++i) ansd[i] = NA_REAL;
         for (int i=0; i<n; ++i) {
           const int ix = (irowslen == -1) ? i : irows[i]-1;
+          if (ix+1==NA_INTEGER) continue;
           if (ISNAN(xd[ix])) continue;
           const int thisgrp = grp[i];  
           if (ISNAN(ansd[thisgrp]) || (xd[ix]<ansd[thisgrp])==min)
@@ -867,7 +875,7 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
         int k = ff[i]+j-1;
         if (isunsorted) k = oo[k]-1;
         k = (irowslen == -1) ? k : irows[k]-1;
-        if (k+1 == NA_INTEGER) nacount++;
+        if (k+1==NA_INTEGER) nacount++;
         else if (isInt64 ? xi64[k]==NA_INTEGER64 : ISNAN(xd[k])) nacount++;
         else subd[j-nacount] = xd[k];
       }
@@ -885,7 +893,7 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
         int k = ff[i]+j-1;
         if (isunsorted) k = oo[k]-1;
         k = (irowslen == -1) ? k : irows[k]-1;
-        if (k+1 == NA_INTEGER) nacount++;
+        if (k+1==NA_INTEGER) nacount++;
         else if (xi[k]==NA_INTEGER) nacount++;
         else subi[j-nacount] = xi[k];
       }
@@ -914,7 +922,7 @@ SEXP glast(SEXP x) {
       int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = ix[k];
+      ians[i] = (k+1==NA_INTEGER) ? NA_LOGICAL : ix[k];
     }
   }
     break;
@@ -926,7 +934,7 @@ SEXP glast(SEXP x) {
       int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = ix[k];
+      ians[i] = (k+1==NA_INTEGER) ? NA_INTEGER : ix[k];
     }
   }
     break;
@@ -938,7 +946,7 @@ SEXP glast(SEXP x) {
       int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = dx[k];
+      dans[i] = (k+1==NA_INTEGER) ? NA_REAL : dx[k];
     }
   }
     break;
@@ -950,7 +958,7 @@ SEXP glast(SEXP x) {
       int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = dx[k];
+      dans[i] = (k+1==NA_INTEGER) ? NA_CPLX : dx[k];
     }
   } break;
   case STRSXP:
@@ -959,7 +967,7 @@ SEXP glast(SEXP x) {
       int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_STRING_ELT(ans, i, STRING_ELT(x, k));
+      SET_STRING_ELT(ans, i, (k+1==NA_INTEGER) ? NA_STRING : STRING_ELT(x, k));
     }
     break;
   case VECSXP:
@@ -968,7 +976,7 @@ SEXP glast(SEXP x) {
       int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
+      SET_VECTOR_ELT(ans, i, (k+1 == NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
     }
     break;
   default:
@@ -992,7 +1000,7 @@ SEXP gfirst(SEXP x) {
       int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = ix[k];
+      ians[i] = (k+1==NA_INTEGER) ? NA_LOGICAL : ix[k];
     }
   }
     break;
@@ -1004,7 +1012,7 @@ SEXP gfirst(SEXP x) {
       int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = ix[k];
+      ians[i] = (k+1==NA_INTEGER) ? NA_INTEGER : ix[k];
     }
   }
     break;
@@ -1016,7 +1024,7 @@ SEXP gfirst(SEXP x) {
       int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = dx[k];
+      dans[i] = (k+1==NA_INTEGER) ? NA_REAL : dx[k];
     }
   }
     break;
@@ -1028,7 +1036,7 @@ SEXP gfirst(SEXP x) {
       int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = dx[k];
+      dans[i] = (k+1==NA_INTEGER) ? NA_CPLX : dx[k];
     }
   } break;
   case STRSXP:
@@ -1037,7 +1045,7 @@ SEXP gfirst(SEXP x) {
       int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_STRING_ELT(ans, i, STRING_ELT(x, k));
+      SET_STRING_ELT(ans, i, (k+1==NA_INTEGER) ? NA_STRING : STRING_ELT(x, k));
     }
     break;
   case VECSXP:
@@ -1046,7 +1054,7 @@ SEXP gfirst(SEXP x) {
       int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_VECTOR_ELT(ans, i, VECTOR_ELT(x, k));
+      SET_VECTOR_ELT(ans, i, (k+1 == NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
     }
     break;
   default:
@@ -1178,6 +1186,7 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
             int ix = ff[i]+j-1;
             if (isunsorted) ix = oo[ix]-1;
             ix = (irowslen == -1) ? ix : irows[ix]-1;
+            if (ix+1==NA_INTEGER) { ans_na=true; break; }
             if (INTEGER(x)[ix] == NA_INTEGER) { ans_na=true; break; }
             INTEGER(sub)[j] = INTEGER(x)[ix];
             m += INTEGER(sub)[j]; // sum
@@ -1203,6 +1212,7 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
             int ix = ff[i]+j-1;
             if (isunsorted) ix = oo[ix]-1;
             ix = (irowslen == -1) ? ix : irows[ix]-1;
+            if (ix+1==NA_INTEGER) continue;
             if (INTEGER(x)[ix] == NA_INTEGER) continue;
             INTEGER(sub)[thisgrpsize] = INTEGER(x)[ix];
             m += INTEGER(sub)[thisgrpsize]; // sum
@@ -1235,6 +1245,7 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
             int ix = ff[i]+j-1;
             if (isunsorted) ix = oo[ix]-1;
             ix = (irowslen == -1) ? ix : irows[ix]-1;
+            if (ix+1==NA_INTEGER) { ans_na=true; break; }
             if (ISNAN(REAL(x)[ix])) { ans_na=true; break; }
             REAL(sub)[j] = REAL(x)[ix];
             m += REAL(sub)[j]; // sum
@@ -1260,6 +1271,7 @@ SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
             int ix = ff[i]+j-1;
             if (isunsorted) ix = oo[ix]-1;
             ix = (irowslen == -1) ? ix : irows[ix]-1;
+            if (ix+1==NA_INTEGER) continue;
             if (ISNAN(REAL(x)[ix])) continue;
             REAL(sub)[thisgrpsize] = REAL(x)[ix];
             m += REAL(sub)[thisgrpsize]; // sum
@@ -1338,10 +1350,10 @@ SEXP gprod(SEXP x, SEXP narm)
       const int thisgrp = grp[i];
       const int ix = (irowslen == -1) ? i : irows[i]-1;
       if (!LOGICAL(narm)[0]) {
-        if (ix+1 == NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
+        if (ix+1==NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
         if (ISNAN(REAL(x)[ix])) { s[thisgrp] = NA_REAL; continue; }
       } else {
-        if (ix+1 == NA_INTEGER) continue;
+        if (ix+1==NA_INTEGER) continue;
         if (ISNAN(REAL(x)[ix])) continue; // else let NA_REAL propogate from here
       }
       s[thisgrp] *= REAL(x)[ix];  // done in long double, like base

From 97b6b5fdf9f6501698d1e7596f28802bbf1932f5 Mon Sep 17 00:00:00 2001
From: Benjamin Schwendinger <benjamin.schwendinger@tuwien.ac.at>
Date: Sun, 22 Aug 2021 21:19:26 +0200
Subject: [PATCH 03/12] fixed spacing

---
 src/gsumm.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index be19772cc3..a100d56edd 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -235,7 +235,7 @@ void *gather(SEXP x, bool *anyNA)
       } else {
         const int *my_x = irows + b*batchSize;
         for (int i=0; i<howMany; i++) {
-          int elem = my_x[i] == NA_INTEGER ? NA_INTEGER : thisx[ my_x[i]-1 ];
+          int elem = my_x[i]==NA_INTEGER ? NA_INTEGER : thisx[ my_x[i]-1 ];
           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
           if (elem==NA_INTEGER) my_anyNA = true;
         }
@@ -264,7 +264,7 @@ void *gather(SEXP x, bool *anyNA)
         } else {
           const int *my_x = irows + b*batchSize;
           for (int i=0; i<howMany; i++) {
-            double elem = my_x[i] == NA_INTEGER ? NA_REAL : thisx[ my_x[i]-1 ];
+            double elem = my_x[i]==NA_INTEGER ? NA_REAL : thisx[ my_x[i]-1 ];
             my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
             if (ISNAN(elem)) my_anyNA = true;
           }
@@ -291,7 +291,7 @@ void *gather(SEXP x, bool *anyNA)
         } else {
           const int *my_x = irows + b*batchSize;
           for (int i=0; i<howMany; i++) {
-            int64_t elem = my_x[i] == NA_INTEGER ? NA_INTEGER64 : thisx[ my_x[i]-1 ];
+            int64_t elem = my_x[i]==NA_INTEGER ? NA_INTEGER64 : thisx[ my_x[i]-1 ];
             my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
             if (elem==INT64_MIN) my_anyNA = true;
           }
@@ -322,7 +322,7 @@ void *gather(SEXP x, bool *anyNA)
       } else {
         const int *my_x = irows + b*batchSize;
         for (int i=0; i<howMany; i++) {
-          Rcomplex elem = my_x[i] == NA_INTEGER ? NA_CPLX : thisx[ my_x[i]-1 ];
+          Rcomplex elem = my_x[i]==NA_INTEGER ? NA_CPLX : thisx[ my_x[i]-1 ];
           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
           if (ISNAN(elem.r) && ISNAN(elem.i)) my_anyNA = true;
         }
@@ -976,7 +976,7 @@ SEXP glast(SEXP x) {
       int k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_VECTOR_ELT(ans, i, (k+1 == NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
+      SET_VECTOR_ELT(ans, i, (k+1==NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
     }
     break;
   default:
@@ -1054,7 +1054,7 @@ SEXP gfirst(SEXP x) {
       int k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_VECTOR_ELT(ans, i, (k+1 == NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
+      SET_VECTOR_ELT(ans, i, (k+1==NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
     }
     break;
   default:
@@ -1092,7 +1092,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = (k+1 == NA_INTEGER) ? NA_LOGICAL : ix[k];
+      ians[i] = (k+1==NA_INTEGER) ? NA_LOGICAL : ix[k];
     }
   }
     break;
@@ -1105,7 +1105,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = (k+1 == NA_INTEGER) ? NA_INTEGER : ix[k];
+      ians[i] = (k+1==NA_INTEGER) ? NA_INTEGER : ix[k];
     }
   }
     break;
@@ -1118,7 +1118,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = (k+1 == NA_INTEGER) ? NA_REAL : dx[k];
+      dans[i] = (k+1==NA_INTEGER) ? NA_REAL : dx[k];
     }
   }
     break;
@@ -1131,7 +1131,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = (k+1 == NA_INTEGER) ? NA_CPLX : dx[k];
+      dans[i] = (k+1==NA_INTEGER) ? NA_CPLX : dx[k];
     }
   } break;
   case STRSXP:
@@ -1141,7 +1141,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_STRING_ELT(ans, i, (k+1 == NA_INTEGER) ? NA_STRING : STRING_ELT(x, k));
+      SET_STRING_ELT(ans, i, (k+1==NA_INTEGER) ? NA_STRING : STRING_ELT(x, k));
     }
     break;
   case VECSXP:
@@ -1151,7 +1151,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
       int k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;
-      SET_VECTOR_ELT(ans, i, (k+1 == NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
+      SET_VECTOR_ELT(ans, i, (k+1==NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
     }
     break;
   default:
@@ -1331,11 +1331,11 @@ SEXP gprod(SEXP x, SEXP narm)
       const int ix = (irowslen == -1) ? i : irows[i]-1;
       if (!LOGICAL(narm)[0]) {
         // Let NA_REAL propogate from here. R_NaReal is IEEE.
-        if (ix+1 == NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
-        if (INTEGER(x)[ix] == NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
+        if (ix+1==NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
+        if (INTEGER(x)[ix]==NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
       } else {
-        if (ix+1 == NA_INTEGER) continue;
-        if (INTEGER(x)[ix] == NA_INTEGER) continue;
+        if (ix+1==NA_INTEGER) continue;
+        if (INTEGER(x)[ix]==NA_INTEGER) continue;
       }
       s[thisgrp] *= INTEGER(x)[ix];  // no under/overflow here, s is long double (like base)
     }

From a4259e4bb9ed336bce3024821114b5527468b07d Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 23 Aug 2021 00:45:19 -0600
Subject: [PATCH 04/12] news item tweak

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 3576ba575f..912c44fbee 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -287,7 +287,7 @@
     # 1: 2017-10-02 09:55:00
     ```
 
-39. `GForce` optimization could run into a segfault when using NA values as argument for subsetting I, [#1994](https://github.com/Rdatatable/data.table/issues/1994). This happened due to out-of-bound conditions, since NA_INTEGER is internally represented by INT_MIN - 1L. Thanks to Arun Srinivasan for reporting, and Benjamin Schwendinger for the PR.
+39. `DT[i, sum(b), by=grp]` (and other optimized-by-group aggregates: `mean`, `var`, `sd`, `median`, `prod`, `min`, `max`, `first`, `last`, `head` and `tail`) could segfault if `i` contains row numbers and one or more are NA, [#1994](https://github.com/Rdatatable/data.table/issues/1994). Thanks to Arun Srinivasan for reporting, and Benjamin Schwendinger for the PR.
 
 ## NOTES
 

From 456e492a6f7d5012bc24024302b59b2448548cf8 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 23 Aug 2021 11:55:59 -0600
Subject: [PATCH 05/12] gminmax: avoid relying on over/underflow and reduce
 logic

---
 src/gsumm.c | 69 ++++++++++++++++++++++++-----------------------------
 1 file changed, 31 insertions(+), 38 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index a100d56edd..6ab82f6e9d 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -723,6 +723,7 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
   SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmin");  
   // GForce guarantees each group has at least one value; i.e. we don't need to consider length-0 per group here
+  const bool nosubset = irowslen==-1;
   switch(TYPEOF(x)) {
   case LGLSXP: case INTSXP: {
     ans = PROTECT(allocVector(INTSXP, ngrp));
@@ -734,20 +735,18 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
       for (int i=0; i<n; ++i) {
         const int thisgrp = grp[i];
         if (ansd[thisgrp]==NA_INTEGER) continue;  // once an NA has been observed in this group, it doesn't matter what the remaining values in this group are
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (ix+1==NA_INTEGER) ansd[thisgrp] = NA_INTEGER;
-        else if (xd[ix]==NA_INTEGER || (xd[ix]<ansd[thisgrp])==min)   
-          ansd[thisgrp] = xd[ix];  // always true on the first value in the group (other than if the first value is INT_MAX or INT_MIN-1 which is fine too)
+        const int elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_INTEGER : xd[irows[i]-1]);
+        if (elem==NA_INTEGER || (elem<ansd[thisgrp])==min)   
+          ansd[thisgrp] = elem;  // always true on the first value in the group (other than if the first value is INT_MAX or INT_MIN-1 which is fine too)
       }
     } else {
       for (int i=0; i<ngrp; ++i) ansd[i] = NA_INTEGER;  // in the all-NA case we now return NA for type consistency 
       for (int i=0; i<n; ++i) {
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (ix+1==NA_INTEGER) continue;
-        if (xd[ix]==NA_INTEGER) continue;
+        const int elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_INTEGER : xd[irows[i]-1]);
+        if (elem==NA_INTEGER) continue;
         const int thisgrp = grp[i];
-        if (ansd[thisgrp]==NA_INTEGER || (xd[ix]<ansd[thisgrp])==min)
-          ansd[thisgrp] = xd[ix];
+        if (ansd[thisgrp]==NA_INTEGER || (elem<ansd[thisgrp])==min)
+          ansd[thisgrp] = elem;
       }
     }}
     break;
@@ -761,20 +760,18 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
       for (int i=0; i<n; ++i) {
         const int thisgrp = grp[i];
         if (ansd[thisgrp]==NA_STRING) continue;
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (ix+1==NA_INTEGER) SET_STRING_ELT(ans, thisgrp, NA_STRING);
-        else if (xd[ix]==NA_STRING || (strcmp(CHAR(xd[ix]), CHAR(ansd[thisgrp]))<0)==min)
-          SET_STRING_ELT(ans, thisgrp, xd[ix]);
+        const SEXP elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_STRING : xd[irows[i]-1]);
+        if (elem==NA_STRING || (strcmp(CHAR(elem), CHAR(ansd[thisgrp]))<0)==min)
+          SET_STRING_ELT(ans, thisgrp, elem);
       }
     } else {
       for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, NA_STRING); // all missing returns NA consistent with base
       for (int i=0; i<n; ++i) {
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (ix+1==NA_INTEGER) continue;
-        if (xd[ix]==NA_STRING) continue;
+        const SEXP elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_STRING : xd[irows[i]-1]);
+        if (elem==NA_STRING) continue;
         const int thisgrp = grp[i];
-        if (ansd[thisgrp]==NA_STRING || (strcmp(CHAR(xd[ix]), CHAR(ansd[thisgrp]))<0)==min)
-          SET_STRING_ELT(ans, thisgrp, xd[ix]);
+        if (ansd[thisgrp]==NA_STRING || (strcmp(CHAR(elem), CHAR(ansd[thisgrp]))<0)==min)
+          SET_STRING_ELT(ans, thisgrp, elem);
       }
     }}
     break;
@@ -789,20 +786,18 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
         for (int i=0; i<n; ++i) {
           const int thisgrp = grp[i];
           if (ansd[thisgrp]==NA_INTEGER64) continue;
-          const int ix = (irowslen == -1) ? i : irows[i]-1;
-          if (ix+1==NA_INTEGER) ansd[thisgrp] = NA_INTEGER64;
-          else if (xd[ix]==NA_INTEGER64 || (xd[ix]<ansd[thisgrp])==min)
-            ansd[thisgrp] = xd[ix];
+          const int64_t elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_INTEGER64 : xd[irows[i]-1]);
+          if (elem==NA_INTEGER64 || (elem<ansd[thisgrp])==min)   
+            ansd[thisgrp] = elem;
         }
       } else {
         for (int i=0; i<ngrp; ++i) ansd[i] = NA_INTEGER64;
         for (int i=0; i<n; ++i) {
-          const int ix = (irowslen == -1) ? i : irows[i]-1;
-          if (ix+1==NA_INTEGER) continue;
-          if (xd[ix]==NA_INTEGER64) continue;
-          const int thisgrp = grp[i];  
-          if (ansd[thisgrp]==NA_INTEGER64 || (xd[ix]<ansd[thisgrp])==min)
-            ansd[thisgrp] = xd[ix];
+          const int64_t elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_INTEGER64 : xd[irows[i]-1]);
+          if (elem==NA_INTEGER64) continue;
+          const int thisgrp = grp[i];
+          if (ansd[thisgrp]==NA_INTEGER64 || (elem<ansd[thisgrp])==min)
+            ansd[thisgrp] = elem;
         }
       }
     } else {
@@ -814,20 +809,18 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
         for (int i=0; i<n; ++i) {
           const int thisgrp = grp[i];
           if (ISNAN(ansd[thisgrp])) continue;
-          const int ix = (irowslen == -1) ? i : irows[i]-1;
-          if (ix+1==NA_INTEGER) ansd[thisgrp] = NA_REAL;
-          else if (ISNAN(xd[ix]) || (xd[ix]<ansd[thisgrp])==min)
-            ansd[thisgrp] = xd[ix];
+          const double elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_REAL : xd[irows[i]-1]);
+          if (ISNAN(elem) || (elem<ansd[thisgrp])==min)   
+            ansd[thisgrp] = elem;
         }
       } else {
         for (int i=0; i<ngrp; ++i) ansd[i] = NA_REAL;
         for (int i=0; i<n; ++i) {
-          const int ix = (irowslen == -1) ? i : irows[i]-1;
-          if (ix+1==NA_INTEGER) continue;
-          if (ISNAN(xd[ix])) continue;
-          const int thisgrp = grp[i];  
-          if (ISNAN(ansd[thisgrp]) || (xd[ix]<ansd[thisgrp])==min)
-            ansd[thisgrp] = xd[ix];
+          const double elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_REAL : xd[irows[i]-1]);
+          if (ISNAN(elem)) continue;
+          const int thisgrp = grp[i];
+          if (ISNAN(ansd[thisgrp]) || (elem<ansd[thisgrp])==min)
+            ansd[thisgrp] = elem;
         }
       }
     }}

From 183456b2746c3344c4695c89fe56607e686e2a89 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 23 Aug 2021 12:04:14 -0600
Subject: [PATCH 06/12] trailing whitespace

---
 src/gsumm.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index 6ab82f6e9d..bf7c4ad37f 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -721,7 +721,7 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
   const int n = (irowslen == -1) ? length(x) : irowslen;
   //clock_t start = clock();
   SEXP ans;
-  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmin");  
+  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmin");
   // GForce guarantees each group has at least one value; i.e. we don't need to consider length-0 per group here
   const bool nosubset = irowslen==-1;
   switch(TYPEOF(x)) {
@@ -736,11 +736,11 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
         const int thisgrp = grp[i];
         if (ansd[thisgrp]==NA_INTEGER) continue;  // once an NA has been observed in this group, it doesn't matter what the remaining values in this group are
         const int elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_INTEGER : xd[irows[i]-1]);
-        if (elem==NA_INTEGER || (elem<ansd[thisgrp])==min)   
+        if (elem==NA_INTEGER || (elem<ansd[thisgrp])==min)
           ansd[thisgrp] = elem;  // always true on the first value in the group (other than if the first value is INT_MAX or INT_MIN-1 which is fine too)
       }
     } else {
-      for (int i=0; i<ngrp; ++i) ansd[i] = NA_INTEGER;  // in the all-NA case we now return NA for type consistency 
+      for (int i=0; i<ngrp; ++i) ansd[i] = NA_INTEGER;  // in the all-NA case we now return NA for type consistency
       for (int i=0; i<n; ++i) {
         const int elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_INTEGER : xd[irows[i]-1]);
         if (elem==NA_INTEGER) continue;
@@ -756,7 +756,7 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
     const SEXP *xd = STRING_PTR(x);
     if (!LOGICAL(narm)[0]) {
       const SEXP init = min ? char_maxString : R_BlankString;  // char_maxString == "\xFF\xFF..." in init.c
-      for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, init); 
+      for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, init);
       for (int i=0; i<n; ++i) {
         const int thisgrp = grp[i];
         if (ansd[thisgrp]==NA_STRING) continue;
@@ -787,7 +787,7 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
           const int thisgrp = grp[i];
           if (ansd[thisgrp]==NA_INTEGER64) continue;
           const int64_t elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_INTEGER64 : xd[irows[i]-1]);
-          if (elem==NA_INTEGER64 || (elem<ansd[thisgrp])==min)   
+          if (elem==NA_INTEGER64 || (elem<ansd[thisgrp])==min)
             ansd[thisgrp] = elem;
         }
       } else {
@@ -810,7 +810,7 @@ static SEXP gminmax(SEXP x, SEXP narm, const bool min)
           const int thisgrp = grp[i];
           if (ISNAN(ansd[thisgrp])) continue;
           const double elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_REAL : xd[irows[i]-1]);
-          if (ISNAN(elem) || (elem<ansd[thisgrp])==min)   
+          if (ISNAN(elem) || (elem<ansd[thisgrp])==min)
             ansd[thisgrp] = elem;
         }
       } else {

From b8c0873337822680dc0627a29948d8ace13d8417 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 23 Aug 2021 13:48:50 -0600
Subject: [PATCH 07/12] same for gmedian

---
 src/gsumm.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index bf7c4ad37f..881c4933ef 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -857,6 +857,7 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmedian");
   SEXP ans = PROTECT(allocVector(REALSXP, ngrp));
   double *ansd = REAL(ans);
+  const bool nosubset = irowslen==-1;
   switch(TYPEOF(x)) {
   case REALSXP: {
     double *subd = REAL(PROTECT(allocVector(REALSXP, maxgrpn))); // allocate once upfront and reuse
@@ -867,9 +868,8 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
       for (int j=0; j<thisgrpsize; ++j) {
         int k = ff[i]+j-1;
         if (isunsorted) k = oo[k]-1;
-        k = (irowslen == -1) ? k : irows[k]-1;
-        if (k+1==NA_INTEGER) nacount++;
-        else if (isInt64 ? xi64[k]==NA_INTEGER64 : ISNAN(xd[k])) nacount++;
+        k = nosubset ? k : (irows[k]==NA_INTEGER ? NA_INTEGER : irows[k]-1);
+        if (k==NA_INTEGER || (isInt64 ? xi64[k]==NA_INTEGER64 : ISNAN(xd[k]))) nacount++;
         else subd[j-nacount] = xd[k];
       }
       thisgrpsize -= nacount;  // all-NA is returned as NA_REAL via n==0 case inside *quickselect
@@ -885,9 +885,7 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
       for (int j=0; j<thisgrpsize; ++j) {
         int k = ff[i]+j-1;
         if (isunsorted) k = oo[k]-1;
-        k = (irowslen == -1) ? k : irows[k]-1;
-        if (k+1==NA_INTEGER) nacount++;
-        else if (xi[k]==NA_INTEGER) nacount++;
+        if (nosubset ? xi[k]==NA_INTEGER : (irows[k]==NA_INTEGER || (k=irows[k]-1,xi[k]==NA_INTEGER))) nacount++; 
         else subi[j-nacount] = xi[k];
       }
       ansd[i] = (nacount && !narm) ? NA_REAL : iquickselect(subi, thisgrpsize-nacount);

From be019fcb1607bea24b17615b02abc4fa05c2833b Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 23 Aug 2021 14:30:29 -0600
Subject: [PATCH 08/12] same for glast, and folded gfirst into gfirstlast

---
 inst/tests/tests.Rraw |   4 +-
 src/gsumm.c           | 136 ++++++++++--------------------------------
 2 files changed, 34 insertions(+), 106 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 68af8a16ca..357fd99cbb 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -14711,8 +14711,8 @@ DT = data.table(id = c(1L,1L,2L), v = as.raw(0:2))
 test(2020.01, DT[, min(v), by=id], error="'raw' not supported by GForce min/max")
 test(2020.02, DT[, max(v), by=id], error="'raw' not supported by GForce min/max")
 test(2020.03, DT[, median(v), by=id], error="'raw' not supported by GForce median")
-test(2020.04, DT[, head(v, 1), by=id], error="'raw' not supported by GForce head")
-test(2020.05, DT[, tail(v, 1), by=id], error="'raw' not supported by GForce tail")
+test(2020.04, DT[, head(v, 1), by=id], error="'raw' not supported by GForce head/tail/first/last")
+test(2020.05, DT[, tail(v, 1), by=id], error="'raw' not supported by GForce head/tail/first/last")
 test(2020.06, DT[, v[1], by=id], error="'raw' not supported by GForce subset")
 test(2020.07, DT[, sd(v), by=id], error="'raw' not supported by GForce sd")
 test(2020.08, DT[, var(v), by=id], error="'raw' not supported by GForce var")
diff --git a/src/gsumm.c b/src/gsumm.c
index 881c4933ef..74d28e1015 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -885,7 +885,7 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
       for (int j=0; j<thisgrpsize; ++j) {
         int k = ff[i]+j-1;
         if (isunsorted) k = oo[k]-1;
-        if (nosubset ? xi[k]==NA_INTEGER : (irows[k]==NA_INTEGER || (k=irows[k]-1,xi[k]==NA_INTEGER))) nacount++; 
+        if (nosubset ? xi[k]==NA_INTEGER : (irows[k]==NA_INTEGER || (k=irows[k]-1,xi[k]==NA_INTEGER))) nacount++;
         else subi[j-nacount] = xi[k];
       }
       ansd[i] = (nacount && !narm) ? NA_REAL : iquickselect(subi, thisgrpsize-nacount);
@@ -900,20 +900,21 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
   return ans;
 }
 
-SEXP glast(SEXP x) {
-  const int n = (irowslen == -1) ? length(x) : irowslen;
+static SEXP gfirstlast(SEXP x, const bool first) {
+  const bool nosubset = irowslen == -1;
+  const int n = nosubset ? length(x) : irowslen;
   SEXP ans;
-  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gtail");
+  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, first?"gfirst":"glast");
+
   switch(TYPEOF(x)) {
   case LGLSXP: {
     const int *ix = LOGICAL(x);
     ans = PROTECT(allocVector(LGLSXP, ngrp));
     int *ians = LOGICAL(ans);
     for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]+grpsize[i]-2;
+      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = (k+1==NA_INTEGER) ? NA_LOGICAL : ix[k];
+      ians[i] = nosubset ? ix[k] : (irows[k]==NA_INTEGER ? NA_LOGICAL : ix[irows[k]-1]);
     }
   }
     break;
@@ -922,10 +923,9 @@ SEXP glast(SEXP x) {
     ans = PROTECT(allocVector(INTSXP, ngrp));
     int *ians = INTEGER(ans);
     for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]+grpsize[i]-2;
+      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = (k+1==NA_INTEGER) ? NA_INTEGER : ix[k];
+      ians[i] = nosubset ? ix[k] : (irows[k]==NA_INTEGER ? NA_INTEGER : ix[irows[k]-1]);
     }
   }
     break;
@@ -934,10 +934,9 @@ SEXP glast(SEXP x) {
     ans = PROTECT(allocVector(REALSXP, ngrp));
     double *dans = REAL(ans);
     for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]+grpsize[i]-2;
+      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = (k+1==NA_INTEGER) ? NA_REAL : dx[k];
+      dans[i] = nosubset ? dx[k] : (irows[k]==NA_INTEGER ? NA_REAL : dx[irows[k]-1]);
     }
   }
     break;
@@ -946,124 +945,53 @@ SEXP glast(SEXP x) {
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
     Rcomplex *dans = COMPLEX(ans);
     for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]+grpsize[i]-2;
+      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = (k+1==NA_INTEGER) ? NA_CPLX : dx[k];
+      dans[i] = nosubset ? dx[k] : (irows[k]==NA_INTEGER ? NA_CPLX : dx[irows[k]-1]);
     }
   } break;
-  case STRSXP:
+  case STRSXP: {
+    const SEXP *sx = STRING_PTR(x);
     ans = PROTECT(allocVector(STRSXP, ngrp));
     for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]+grpsize[i]-2;
+      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      SET_STRING_ELT(ans, i, (k+1==NA_INTEGER) ? NA_STRING : STRING_ELT(x, k));
+      SET_STRING_ELT(ans, i, nosubset ? sx[k] : (irows[k]==NA_INTEGER ? NA_STRING : sx[irows[k]-1]));
     }
-    break;
-  case VECSXP:
+  } break;
+  case VECSXP: {
+    const SEXP *vx = SEXPPTR_RO(x);
     ans = PROTECT(allocVector(VECSXP, ngrp));
     for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]+grpsize[i]-2;
+      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      SET_VECTOR_ELT(ans, i, (k+1==NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
+      SET_VECTOR_ELT(ans, i, nosubset ? vx[k] : (irows[k]==NA_INTEGER ? ScalarLogical(NA_LOGICAL) : vx[irows[k]-1]));
     }
-    break;
+  } break;
   default:
-    error(_("Type '%s' not supported by GForce tail (gtail). Either add the prefix utils::tail(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
+    error(_("Type '%s' not supported by GForce head/tail/first/last. Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
   }
   copyMostAttrib(x, ans);
   UNPROTECT(1);
   return(ans);
 }
 
+SEXP glast(SEXP x) {
+  return gfirstlast(x, false);
+}
+
 SEXP gfirst(SEXP x) {
-  const int n = (irowslen == -1) ? length(x) : irowslen;
-  SEXP ans;
-  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "ghead");
-  switch(TYPEOF(x)) {
-  case LGLSXP: {
-    int const *ix = LOGICAL(x);
-    ans = PROTECT(allocVector(LGLSXP, ngrp));
-    int *ians = LOGICAL(ans);
-    for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]-1;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = (k+1==NA_INTEGER) ? NA_LOGICAL : ix[k];
-    }
-  }
-    break;
-  case INTSXP: {
-    const int *ix = INTEGER(x);
-    ans = PROTECT(allocVector(INTSXP, ngrp));
-    int *ians = INTEGER(ans);
-    for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]-1;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = (k+1==NA_INTEGER) ? NA_INTEGER : ix[k];
-    }
-  }
-    break;
-  case REALSXP: {
-    const double *dx = REAL(x);
-    ans = PROTECT(allocVector(REALSXP, ngrp));
-    double *dans = REAL(ans);
-    for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]-1;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = (k+1==NA_INTEGER) ? NA_REAL : dx[k];
-    }
-  }
-    break;
-  case CPLXSXP: {
-    const Rcomplex *dx = COMPLEX(x);
-    ans = PROTECT(allocVector(CPLXSXP, ngrp));
-    Rcomplex *dans = COMPLEX(ans);
-    for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]-1;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = (k+1==NA_INTEGER) ? NA_CPLX : dx[k];
-    }
-  } break;
-  case STRSXP:
-    ans = PROTECT(allocVector(STRSXP, ngrp));
-    for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]-1;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      SET_STRING_ELT(ans, i, (k+1==NA_INTEGER) ? NA_STRING : STRING_ELT(x, k));
-    }
-    break;
-  case VECSXP:
-    ans = PROTECT(allocVector(VECSXP, ngrp));
-    for (int i=0; i<ngrp; ++i) {
-      int k = ff[i]-1;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      SET_VECTOR_ELT(ans, i, (k+1==NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
-    }
-    break;
-  default:
-    error(_("Type '%s' not supported by GForce head (ghead). Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
-  }
-  copyMostAttrib(x, ans);
-  UNPROTECT(1);
-  return(ans);
+  return gfirstlast(x, true);
 }
 
 SEXP gtail(SEXP x, SEXP valArg) {
   if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, gtail is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
-  return (glast(x));
+  return glast(x);
 }
 
 SEXP ghead(SEXP x, SEXP valArg) {
   if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, ghead is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
-  return (gfirst(x));
+  return gfirst(x);
 }
 
 SEXP gnthvalue(SEXP x, SEXP valArg) {

From 798a2b37cb27a3c8f1f3efe134fd617d26320481 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 23 Aug 2021 15:14:04 -0600
Subject: [PATCH 09/12] gnthvalue folded into gfirstlast

---
 inst/tests/tests.Rraw |   6 +--
 src/gsumm.c           | 117 ++++++++----------------------------------
 2 files changed, 24 insertions(+), 99 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 357fd99cbb..f05d6b2e0e 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -14711,9 +14711,9 @@ DT = data.table(id = c(1L,1L,2L), v = as.raw(0:2))
 test(2020.01, DT[, min(v), by=id], error="'raw' not supported by GForce min/max")
 test(2020.02, DT[, max(v), by=id], error="'raw' not supported by GForce min/max")
 test(2020.03, DT[, median(v), by=id], error="'raw' not supported by GForce median")
-test(2020.04, DT[, head(v, 1), by=id], error="'raw' not supported by GForce head/tail/first/last")
-test(2020.05, DT[, tail(v, 1), by=id], error="'raw' not supported by GForce head/tail/first/last")
-test(2020.06, DT[, v[1], by=id], error="'raw' not supported by GForce subset")
+test(2020.04, DT[, head(v, 1), by=id], error="'raw' not supported by GForce head/tail/first/last/`[`")
+test(2020.05, DT[, tail(v, 1), by=id], error="'raw' not supported by GForce head/tail/first/last/`[`")
+test(2020.06, DT[, v[1], by=id], error="'raw' not supported by GForce head/tail/first/last/`[`")
 test(2020.07, DT[, sd(v), by=id], error="'raw' not supported by GForce sd")
 test(2020.08, DT[, var(v), by=id], error="'raw' not supported by GForce var")
 test(2020.09, DT[, prod(v), by=id], error="'raw' not supported by GForce prod")
diff --git a/src/gsumm.c b/src/gsumm.c
index 74d28e1015..5832735ee4 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -900,19 +900,21 @@ SEXP gmedian(SEXP x, SEXP narmArg) {
   return ans;
 }
 
-static SEXP gfirstlast(SEXP x, const bool first) {
+static SEXP gfirstlast(SEXP x, const bool first, const int w) {
+  // w: which item (1 other than for gnthvalue when could be >1)
   const bool nosubset = irowslen == -1;
   const int n = nosubset ? length(x) : irowslen;
   SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, first?"gfirst":"glast");
-
+  const bool gnth = w>1;  // const bool to avoid fetching grpsize[i] when not needed
   switch(TYPEOF(x)) {
   case LGLSXP: {
     const int *ix = LOGICAL(x);
     ans = PROTECT(allocVector(LGLSXP, ngrp));
     int *ians = LOGICAL(ans);
     for (int i=0; i<ngrp; ++i) {
-      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
+      if (gnth && w>grpsize[i]) { ians[i]=NA_LOGICAL; continue; }
+      int k = first ? ff[i]+w-2 : ff[i]+grpsize[i]-w-1;
       if (isunsorted) k = oo[k]-1;
       ians[i] = nosubset ? ix[k] : (irows[k]==NA_INTEGER ? NA_LOGICAL : ix[irows[k]-1]);
     }
@@ -923,7 +925,8 @@ static SEXP gfirstlast(SEXP x, const bool first) {
     ans = PROTECT(allocVector(INTSXP, ngrp));
     int *ians = INTEGER(ans);
     for (int i=0; i<ngrp; ++i) {
-      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
+      if (gnth && w>grpsize[i]) { ians[i]=NA_INTEGER; continue; }
+      int k = first ? ff[i]+w-2 : ff[i]+grpsize[i]-w-1;
       if (isunsorted) k = oo[k]-1;
       ians[i] = nosubset ? ix[k] : (irows[k]==NA_INTEGER ? NA_INTEGER : ix[irows[k]-1]);
     }
@@ -934,7 +937,8 @@ static SEXP gfirstlast(SEXP x, const bool first) {
     ans = PROTECT(allocVector(REALSXP, ngrp));
     double *dans = REAL(ans);
     for (int i=0; i<ngrp; ++i) {
-      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
+      if (gnth && w>grpsize[i]) { dans[i]=NA_REAL; continue; }
+      int k = first ? ff[i]+w-2 : ff[i]+grpsize[i]-w-1;
       if (isunsorted) k = oo[k]-1;
       dans[i] = nosubset ? dx[k] : (irows[k]==NA_INTEGER ? NA_REAL : dx[irows[k]-1]);
     }
@@ -945,7 +949,8 @@ static SEXP gfirstlast(SEXP x, const bool first) {
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
     Rcomplex *dans = COMPLEX(ans);
     for (int i=0; i<ngrp; ++i) {
-      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
+      if (gnth && w>grpsize[i]) { dans[i]=NA_CPLX; continue; }
+      int k = first ? ff[i]+w-2 : ff[i]+grpsize[i]-w-1;
       if (isunsorted) k = oo[k]-1;
       dans[i] = nosubset ? dx[k] : (irows[k]==NA_INTEGER ? NA_CPLX : dx[irows[k]-1]);
     }
@@ -954,7 +959,8 @@ static SEXP gfirstlast(SEXP x, const bool first) {
     const SEXP *sx = STRING_PTR(x);
     ans = PROTECT(allocVector(STRSXP, ngrp));
     for (int i=0; i<ngrp; ++i) {
-      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
+      if (gnth && w>grpsize[i]) { SET_STRING_ELT(ans, i, NA_STRING); continue; }
+      int k = first ? ff[i]+w-2 : ff[i]+grpsize[i]-w-1;
       if (isunsorted) k = oo[k]-1;
       SET_STRING_ELT(ans, i, nosubset ? sx[k] : (irows[k]==NA_INTEGER ? NA_STRING : sx[irows[k]-1]));
     }
@@ -963,13 +969,14 @@ static SEXP gfirstlast(SEXP x, const bool first) {
     const SEXP *vx = SEXPPTR_RO(x);
     ans = PROTECT(allocVector(VECSXP, ngrp));
     for (int i=0; i<ngrp; ++i) {
-      int k = first ? ff[i]-1 : ff[i]+grpsize[i]-2;
+      if (gnth && w>grpsize[i]) { SET_VECTOR_ELT(ans, i, ScalarLogical(NA_LOGICAL)); continue; }
+      int k = first ? ff[i]+w-2 : ff[i]+grpsize[i]-w-1;
       if (isunsorted) k = oo[k]-1;
       SET_VECTOR_ELT(ans, i, nosubset ? vx[k] : (irows[k]==NA_INTEGER ? ScalarLogical(NA_LOGICAL) : vx[irows[k]-1]));
     }
   } break;
   default:
-    error(_("Type '%s' not supported by GForce head/tail/first/last. Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
+    error(_("Type '%s' not supported by GForce head/tail/first/last/`[`. Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
   }
   copyMostAttrib(x, ans);
   UNPROTECT(1);
@@ -977,108 +984,26 @@ static SEXP gfirstlast(SEXP x, const bool first) {
 }
 
 SEXP glast(SEXP x) {
-  return gfirstlast(x, false);
+  return gfirstlast(x, false, 1);
 }
 
 SEXP gfirst(SEXP x) {
-  return gfirstlast(x, true);
+  return gfirstlast(x, true, 1);
 }
 
 SEXP gtail(SEXP x, SEXP valArg) {
   if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, gtail is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
-  return glast(x);
+  return gfirstlast(x, false, 1);
 }
 
 SEXP ghead(SEXP x, SEXP valArg) {
   if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]!=1) error(_("Internal error, ghead is only implemented for n=1. This should have been caught before. please report to data.table issue tracker.")); // # nocov
-  return gfirst(x);
+  return gfirstlast(x, true, 1);
 }
 
 SEXP gnthvalue(SEXP x, SEXP valArg) {
-
   if (!isInteger(valArg) || LENGTH(valArg)!=1 || INTEGER(valArg)[0]<=0) error(_("Internal error, `g[` (gnthvalue) is only implemented single value subsets with positive index, e.g., .SD[2]. This should have been caught before. please report to data.table issue tracker.")); // # nocov
-  const int val=INTEGER(valArg)[0];
-  const int n = (irowslen == -1) ? length(x) : irowslen;
-  SEXP ans;
-  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "ghead");
-  switch(TYPEOF(x)) {
-  case LGLSXP: {
-    const int *ix = LOGICAL(x);
-    ans = PROTECT(allocVector(LGLSXP, ngrp));
-    int *ians = LOGICAL(ans);
-    for (int i=0; i<ngrp; ++i) {
-      if (val > grpsize[i]) { LOGICAL(ans)[i] = NA_LOGICAL; continue; }
-      int k = ff[i]+val-2;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = (k+1==NA_INTEGER) ? NA_LOGICAL : ix[k];
-    }
-  }
-    break;
-  case INTSXP: {
-    const int *ix = INTEGER(x);
-    ans = PROTECT(allocVector(INTSXP, ngrp));
-    int *ians = INTEGER(ans);
-    for (int i=0; i<ngrp; ++i) {
-      if (val > grpsize[i]) { INTEGER(ans)[i] = NA_INTEGER; continue; }
-      int k = ff[i]+val-2;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      ians[i] = (k+1==NA_INTEGER) ? NA_INTEGER : ix[k];
-    }
-  }
-    break;
-  case REALSXP: {
-    const double *dx = REAL(x);
-    ans = PROTECT(allocVector(REALSXP, ngrp));
-    double *dans = REAL(ans);
-    for (int i=0; i<ngrp; ++i) {
-      if (val > grpsize[i]) { REAL(ans)[i] = NA_REAL; continue; }
-      int k = ff[i]+val-2;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = (k+1==NA_INTEGER) ? NA_REAL : dx[k];
-    }
-  }
-    break;
-  case CPLXSXP: {
-    const Rcomplex *dx = COMPLEX(x);
-    ans = PROTECT(allocVector(CPLXSXP, ngrp));
-    Rcomplex *dans = COMPLEX(ans);
-    for (int i=0; i<ngrp; ++i) {
-      if (val > grpsize[i]) { dans[i].r = NA_REAL; dans[i].i = NA_REAL; continue; }
-      int k = ff[i]+val-2;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      dans[i] = (k+1==NA_INTEGER) ? NA_CPLX : dx[k];
-    }
-  } break;
-  case STRSXP:
-    ans = PROTECT(allocVector(STRSXP, ngrp));
-    for (int i=0; i<ngrp; ++i) {
-      if (val > grpsize[i]) { SET_STRING_ELT(ans, i, NA_STRING); continue; }
-      int k = ff[i]+val-2;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      SET_STRING_ELT(ans, i, (k+1==NA_INTEGER) ? NA_STRING : STRING_ELT(x, k));
-    }
-    break;
-  case VECSXP:
-    ans = PROTECT(allocVector(VECSXP, ngrp));
-    for (int i=0; i<ngrp; ++i) {
-      if (val > grpsize[i]) { SET_VECTOR_ELT(ans, i, R_NilValue); continue; }
-      int k = ff[i]+val-2;
-      if (isunsorted) k = oo[k]-1;
-      k = (irowslen == -1) ? k : irows[k]-1;
-      SET_VECTOR_ELT(ans, i, (k+1==NA_INTEGER) ? ScalarLogical(NA_LOGICAL) : VECTOR_ELT(x, k));
-    }
-    break;
-  default:
-    error(_("Type '%s' not supported by GForce subset `[` (gnthvalue). Either add the prefix utils::head(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
-  }
-  copyMostAttrib(x, ans);
-  UNPROTECT(1);
-  return(ans);
+  return gfirstlast(x, true, INTEGER(valArg)[0]);
 }
 
 // TODO: gwhich.min, gwhich.max

From 87878d01ae7579029be045dbae487449b40f737c Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 23 Aug 2021 17:05:41 -0600
Subject: [PATCH 10/12] gvarsd1: reduced narm repeated logic, R API outside
 loops, avoided overflow in NA-in-irows fix

---
 src/gsumm.c | 180 ++++++++++++++++++----------------------------------
 1 file changed, 62 insertions(+), 118 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index 5832735ee4..1864bb530c 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -1008,143 +1008,87 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
 
 // TODO: gwhich.min, gwhich.max
 // implemented this similar to gmedian to balance well between speed and memory usage. There's one extra allocation on maximum groups and that's it.. and that helps speed things up extremely since we don't have to collect x's values for each group for each step (mean, residuals, mean again and then variance).
-SEXP gvarsd1(SEXP x, SEXP narm, Rboolean isSD)
+static SEXP gvarsd1(SEXP x, SEXP narmArg, bool isSD)
 {
-  if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
+  if (!isLogical(narmArg) || LENGTH(narmArg)!=1 || LOGICAL(narmArg)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
   if (!isVectorAtomic(x)) error(_("GForce var/sd can only be applied to columns, not .SD or similar. For the full covariance matrix of all items in a list such as .SD, either add the prefix stats::var(.SD) (or stats::sd(.SD)) or turn off GForce optimization using options(datatable.optimize=1). Alternatively, if you only need the diagonal elements, 'DT[,lapply(.SD,var),by=,.SDcols=]' is the optimized way to do this."));
   if (inherits(x, "factor")) error(_("var/sd is not meaningful for factors."));
   const int n = (irowslen == -1) ? length(x) : irowslen;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gvar");
   SEXP sub, ans = PROTECT(allocVector(REALSXP, ngrp));
+  double *ansd = REAL(ans);
+  const bool nosubset = irowslen==-1;
+  const bool narm = LOGICAL(narmArg)[0];
   switch(TYPEOF(x)) {
-  case LGLSXP: case INTSXP:
+  case LGLSXP: case INTSXP: {
     sub = PROTECT(allocVector(INTSXP, maxgrpn)); // allocate once upfront
-    if (!LOGICAL(narm)[0]) {
-      for (int i=0; i<ngrp; ++i) {
-        long double m=0., s=0., v=0.;
-        bool ans_na = false;
-        if (grpsize[i] != 1) {
-          const int thisgrpsize = grpsize[i];
-          SETLENGTH(sub, thisgrpsize); // to gather this group's data
-          for (int j=0; j<thisgrpsize; ++j) {
-            int ix = ff[i]+j-1;
-            if (isunsorted) ix = oo[ix]-1;
-            ix = (irowslen == -1) ? ix : irows[ix]-1;
-            if (ix+1==NA_INTEGER) { ans_na=true; break; }
-            if (INTEGER(x)[ix] == NA_INTEGER) { ans_na=true; break; }
-            INTEGER(sub)[j] = INTEGER(x)[ix];
-            m += INTEGER(sub)[j]; // sum
-          }
-          if (ans_na) { REAL(ans)[i] = NA_REAL; continue; }
-          m = m/thisgrpsize; // mean, first pass
-          for (int j=0; j<thisgrpsize; ++j) s += (INTEGER(sub)[j]-m); // residuals
-          m += (s/thisgrpsize); // mean, second pass
-          for (int j=0; j<thisgrpsize; ++j) { // variance
-            v += (INTEGER(sub)[j]-(double)m) * (INTEGER(sub)[j]-(double)m);
-          }
-          REAL(ans)[i] = (double)v/(thisgrpsize-1);
-          if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
-        } else REAL(ans)[i] = NA_REAL;
-      }
-    } else {
-      for (int i=0; i<ngrp; ++i) {
+    int *subd = INTEGER(sub);
+    const int *xd = INTEGER(x);
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      if (thisgrpsize==1) {
+        ansd[i] = NA_REAL;
+      } else {
+        int nna = 0;  // how many not-NA
         long double m=0., s=0., v=0.;
-        int thisgrpsize = 0;
-        if (grpsize[i] != 1) {
-          SETLENGTH(sub, grpsize[i]); // to gather this group's data
-          for (int j=0; j<grpsize[i]; ++j) {
-            int ix = ff[i]+j-1;
-            if (isunsorted) ix = oo[ix]-1;
-            ix = (irowslen == -1) ? ix : irows[ix]-1;
-            if (ix+1==NA_INTEGER) continue;
-            if (INTEGER(x)[ix] == NA_INTEGER) continue;
-            INTEGER(sub)[thisgrpsize] = INTEGER(x)[ix];
-            m += INTEGER(sub)[thisgrpsize]; // sum
-            thisgrpsize++;
-          }
-          if (thisgrpsize <= 1) { REAL(ans)[i] = NA_REAL; continue; }
-          m = m/thisgrpsize; // mean, first pass
-          for (int j=0; j<thisgrpsize; ++j) s += (INTEGER(sub)[j]-m); // residuals
-          m += (s/thisgrpsize); // mean, second pass
-          for (int j=0; j<thisgrpsize; ++j) { // variance
-            v += (INTEGER(sub)[j]-(double)m) * (INTEGER(sub)[j]-(double)m);
+        for (int j=0; j<thisgrpsize; ++j) {
+          int ix = ff[i]+j-1;
+          if (isunsorted) ix = oo[ix]-1;
+          if (nosubset ? xd[ix]==NA_INTEGER : (irows[ix]==NA_INTEGER || (ix=irows[ix]-1,xd[ix]==NA_INTEGER))) {
+            if (narm) continue; else break;
           }
-          REAL(ans)[i] = (double)v/(thisgrpsize-1);
-          if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
-        } else REAL(ans)[i] = NA_REAL;
+          m += (subd[nna++]=xd[ix]); // sum
+        }
+        if (nna!=thisgrpsize && (!narm || nna<=1)) { ansd[i]=NA_REAL; continue; }
+        m = m/nna; // mean, first pass
+        for (int j=0; j<nna; ++j) s += (subd[j]-m); // residuals
+        m += (s/nna); // mean, second pass
+        for (int j=0; j<nna; ++j) { // variance
+          v += (subd[j]-(double)m) * (subd[j]-(double)m);
+        }
+        ansd[i] = (double)v/(nna-1);
+        if (isSD) ansd[i] = SQRTL(ansd[i]);
       }
-    }
-    SETLENGTH(sub, maxgrpn);
+    }}
     break;
-  case REALSXP:
+  case REALSXP: {
     sub = PROTECT(allocVector(REALSXP, maxgrpn)); // allocate once upfront
-    if (!LOGICAL(narm)[0]) {
-      for (int i=0; i<ngrp; ++i) {
-        long double m=0., s=0., v=0.;
-        bool ans_na=false;
-        if (grpsize[i] != 1) {
-          const int thisgrpsize = grpsize[i];
-          SETLENGTH(sub, thisgrpsize); // to gather this group's data
-          for (int j=0; j<thisgrpsize; ++j) {
-            int ix = ff[i]+j-1;
-            if (isunsorted) ix = oo[ix]-1;
-            ix = (irowslen == -1) ? ix : irows[ix]-1;
-            if (ix+1==NA_INTEGER) { ans_na=true; break; }
-            if (ISNAN(REAL(x)[ix])) { ans_na=true; break; }
-            REAL(sub)[j] = REAL(x)[ix];
-            m += REAL(sub)[j]; // sum
-          }
-          if (ans_na) { REAL(ans)[i] = NA_REAL; continue; }
-          m = m/thisgrpsize; // mean, first pass
-          for (int j=0; j<thisgrpsize; ++j) s += (REAL(sub)[j]-m); // residuals
-          m += (s/thisgrpsize); // mean, second pass
-          for (int j=0; j<thisgrpsize; ++j) { // variance
-            v += (REAL(sub)[j]-(double)m) * (REAL(sub)[j]-(double)m);
-          }
-          REAL(ans)[i] = (double)v/(thisgrpsize-1);
-          if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
-        } else REAL(ans)[i] = NA_REAL;
-      }
-    } else {
-      for (int i=0; i<ngrp; ++i) {
+    double *subd = REAL(sub);
+    const double *xd = REAL(x);
+    for (int i=0; i<ngrp; ++i) {
+      const int thisgrpsize = grpsize[i];
+      if (thisgrpsize==1) {
+        ansd[i] = NA_REAL;
+      } else {
+        int nna = 0;  // how many not-NA
         long double m=0., s=0., v=0.;
-        int thisgrpsize = 0;
-        if (grpsize[i] != 1) {
-          SETLENGTH(sub, grpsize[i]); // to gather this group's data
-          for (int j=0; j<grpsize[i]; ++j) {
-            int ix = ff[i]+j-1;
-            if (isunsorted) ix = oo[ix]-1;
-            ix = (irowslen == -1) ? ix : irows[ix]-1;
-            if (ix+1==NA_INTEGER) continue;
-            if (ISNAN(REAL(x)[ix])) continue;
-            REAL(sub)[thisgrpsize] = REAL(x)[ix];
-            m += REAL(sub)[thisgrpsize]; // sum
-            thisgrpsize++;
-          }
-          if (thisgrpsize <= 1) { REAL(ans)[i] = NA_REAL; continue; }
-          m = m/thisgrpsize; // mean, first pass
-          for (int j=0; j<thisgrpsize; ++j) s += (REAL(sub)[j]-m); // residuals
-          m += (s/thisgrpsize); // mean, second pass
-          for (int j=0; j<thisgrpsize; ++j) { // variance
-            v += (REAL(sub)[j]-(double)m) * (REAL(sub)[j]-(double)m);
+        for (int j=0; j<thisgrpsize; ++j) {
+          int ix = ff[i]+j-1;
+          if (isunsorted) ix = oo[ix]-1;
+          if (nosubset ? ISNAN(xd[ix]) : (irows[ix]==NA_INTEGER || (ix=irows[ix]-1,ISNAN(xd[ix])))) {
+            if (narm) continue; else break;
           }
-          REAL(ans)[i] = (double)v/(thisgrpsize-1);
-          if (isSD) REAL(ans)[i] = SQRTL(REAL(ans)[i]);
-        } else REAL(ans)[i] = NA_REAL;
+          m += (subd[nna++]=xd[ix]); // sum
+        }
+        if (nna!=thisgrpsize && (!narm || nna<=1)) { ansd[i]=NA_REAL; continue; }
+        m = m/nna; // mean, first pass
+        for (int j=0; j<nna; ++j) s += (subd[j]-m); // residuals
+        m += (s/nna); // mean, second pass
+        for (int j=0; j<nna; ++j) { // variance
+          v += (subd[j]-(double)m) * (subd[j]-(double)m);
+        }
+        ansd[i] = (double)v/(nna-1);
+        if (isSD) ansd[i] = SQRTL(ansd[i]);
       }
-    }
-    SETLENGTH(sub, maxgrpn);
+    }}
     break;
-  default:
-      if (!isSD) {
-        error(_("Type '%s' not supported by GForce var (gvar). Either add the prefix stats::var(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
-      } else {
-        error(_("Type '%s' not supported by GForce sd (gsd). Either add the prefix stats::sd(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
-      }
+  default:  
+    error(_("Type '%s' not supported by GForce %s. Either add the prefix stats::var(.) or turn off GForce optimization using options(datatable.optimize=1)"),
+            type2char(TYPEOF(x)), isSD?"sd (gsd)":"var (gvar)");
   }
   // no copyMostAttrib(x, ans) since class (e.g. Date) unlikely applicable to sd/var
-  UNPROTECT(2);
-  return (ans);
+  UNPROTECT(2); // ans,sub
+  return ans;
 }
 
 SEXP gvar(SEXP x, SEXP narm) {

From da78a08c76b2f79d9a22c5ec39fe9c45cf0e4e93 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 23 Aug 2021 17:45:04 -0600
Subject: [PATCH 11/12] gprod: similar

---
 src/gsumm.c | 63 ++++++++++++++++++++++++-----------------------------
 1 file changed, 28 insertions(+), 35 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index 1864bb530c..713bdc23c4 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -1099,12 +1099,13 @@ SEXP gsd(SEXP x, SEXP narm) {
   return (gvarsd1(x, narm, TRUE));
 }
 
-SEXP gprod(SEXP x, SEXP narm)
-{
-  if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
+SEXP gprod(SEXP x, SEXP narmArg) {
+  if (!isLogical(narmArg) || LENGTH(narmArg)!=1 || LOGICAL(narmArg)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
+  const bool narm=LOGICAL(narmArg)[0];
   if (!isVectorAtomic(x)) error(_("GForce prod can only be applied to columns, not .SD or similar. To multiply all items in a list such as .SD, either add the prefix base::prod(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,prod),by=,.SDcols=]'"));
   if (inherits(x, "factor")) error(_("prod is not meaningful for factors."));
-  const int n = (irowslen == -1) ? length(x) : irowslen;
+  const bool nosubset = irowslen==-1;
+  const int n = nosubset ? length(x) : irowslen;
   //clock_t start = clock();
   SEXP ans;
   if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gprod");
@@ -1112,53 +1113,45 @@ SEXP gprod(SEXP x, SEXP narm)
   if (!s) error(_("Unable to allocate %d * %d bytes for gprod"), ngrp, sizeof(long double));
   for (int i=0; i<ngrp; ++i) s[i] = 1.0;
   ans = PROTECT(allocVector(REALSXP, ngrp));
+  double *ansd = REAL(ans);
   switch(TYPEOF(x)) {
-  case LGLSXP: case INTSXP:
+  case LGLSXP: case INTSXP: {
+    const int *xd = INTEGER(x);
     for (int i=0; i<n; ++i) {
       const int thisgrp = grp[i];
-      const int ix = (irowslen == -1) ? i : irows[i]-1;
-      if (!LOGICAL(narm)[0]) {
-        // Let NA_REAL propogate from here. R_NaReal is IEEE.
-        if (ix+1==NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
-        if (INTEGER(x)[ix]==NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
-      } else {
-        if (ix+1==NA_INTEGER) continue;
-        if (INTEGER(x)[ix]==NA_INTEGER) continue;
+      const int elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_INTEGER : xd[irows[i]-1]);
+      if (elem==NA_INTEGER) {
+        if (!narm) s[thisgrp] = NA_REAL;  // Let NA_REAL propogate from here. R_NaReal is IEEE.
+        continue;
       }
-      s[thisgrp] *= INTEGER(x)[ix];  // no under/overflow here, s is long double (like base)
-    }
-    for (int i=0; i<ngrp; ++i) {
-      if (s[i] > DBL_MAX) REAL(ans)[i] = R_PosInf;
-      else if (s[i] < -DBL_MAX) REAL(ans)[i] = R_NegInf;
-      else REAL(ans)[i] = (double)s[i];
-    }
+      s[thisgrp] *= elem; // no under/overflow here, s is long double (like base)
+    }}
     break;
-  case REALSXP:
+  case REALSXP: {
+    const double *xd = REAL(x);
     for (int i=0; i<n; ++i) {
       const int thisgrp = grp[i];
-      const int ix = (irowslen == -1) ? i : irows[i]-1;
-      if (!LOGICAL(narm)[0]) {
-        if (ix+1==NA_INTEGER) { s[thisgrp] = NA_REAL; continue; }
-        if (ISNAN(REAL(x)[ix])) { s[thisgrp] = NA_REAL; continue; }
-      } else {
-        if (ix+1==NA_INTEGER) continue;
-        if (ISNAN(REAL(x)[ix])) continue; // else let NA_REAL propogate from here
+      const double elem = nosubset ? xd[i] : (irows[i]==NA_INTEGER ? NA_REAL : xd[irows[i]-1]);
+      if (ISNAN(elem)) {
+        if (!narm) s[thisgrp] = NA_REAL;
+        continue;
       }
-      s[thisgrp] *= REAL(x)[ix];  // done in long double, like base
-    }
-    for (int i=0; i<ngrp; ++i) {
-      if (s[i] > DBL_MAX) REAL(ans)[i] = R_PosInf;
-      else if (s[i] < -DBL_MAX) REAL(ans)[i] = R_NegInf;
-      else REAL(ans)[i] = (double)s[i];
-    }
+      s[thisgrp] *= elem;
+    }}
     break;
   default:
     free(s);
     error(_("Type '%s' not supported by GForce prod (gprod). Either add the prefix base::prod(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
   }
+  for (int i=0; i<ngrp; ++i) {
+    if (s[i] > DBL_MAX) ansd[i] = R_PosInf;
+    else if (s[i] < -DBL_MAX) ansd[i] = R_NegInf;
+    else ansd[i] = (double)s[i];
+  }
   free(s);
   copyMostAttrib(x, ans);
   UNPROTECT(1);
   // Rprintf(_("this gprod took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
   return(ans);
 }
+

From 072555906610403c8f8a2c37de0949259df5c122 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 23 Aug 2021 17:50:36 -0600
Subject: [PATCH 12/12] news item tweak

---
 NEWS.md               | 26 +++++++++++++-------------
 inst/tests/tests.Rraw | 12 ++++++------
 src/gsumm.c           |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 912c44fbee..c91e72b81c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -18,7 +18,7 @@
     DT = data.table(A=1:3, B=letters[1:3])
     DT[A>3,   .(ITEM='A>3', A, B)]  # (1)
     DT[A>3][, .(ITEM='A>3', A, B)]  # (2)
-    # the above are now equivalent as expected and return:  
+    # the above are now equivalent as expected and return:
     Empty data.table (0 rows and 3 cols): ITEM,A,B
     # Previously, (2) returned :
           ITEM     A      B
@@ -30,12 +30,12 @@
     2: In as.data.table.list(jval, .named = NULL) :
       Item 3 has 0 rows but longest item has 1; filled with NA
     ```
-    
+
     ```R
     DT = data.table(A=1:3, B=letters[1:3], key="A")
     DT[.(1:3, double()), B]
     # new result :
-    character(0)   
+    character(0)
     # old result :
     [1] "a" "b" "c"
     Warning message:
@@ -51,7 +51,7 @@
     DT[, sum(colB), keyby="colA"]
     DT[, sum(colB), by="colA", keyby=TRUE]   # same
     ```
-    
+
 7. `fwrite()` gains a new `datatable.fwrite.sep` option to change the default separator, still `","` by default. Thanks to Tony Fischetti for the PR. As is good practice in R in general, we usually resist new global options for the reason that a user changing the option for their own code can inadvertently change the behaviour of any package using `data.table` too. However, in this case, the global option affects file output rather than code behaviour. In fact, the very reason the user may wish to change the default separator is that they know a different separator is more appropriate for their data being passed to the package using `fwrite` but cannot otherwise change the `fwrite` call within that package.
 
 8. `melt()` now supports `NA` entries when specifying a list of `measure.vars`, which translate into runs of missing values in the output. Useful for melting wide data with some missing columns, [#4027](https://github.com/Rdatatable/data.table/issues/4027). Thanks to @vspinu for reporting, and @tdhock for implementing.
@@ -86,7 +86,7 @@
         out_col_name = "sum_x"
       )]
     ```
-    
+
 11. `DT[, if (...) .(a=1L) else .(a=1L, b=2L), by=group]` now returns a 1-column result with warning `j may not evaluate to the same number of columns for each group`, rather than error `'names' attribute [2] must be the same length as the vector`, [#4274](https://github.com/Rdatatable/data.table/issues/4274). Thanks to @robitalec for reporting, and Michael Chirico for the PR.
 
 12. Typo checking in `i` available since 1.11.4 is extended to work in non-English sessions, [#4989](https://github.com/Rdatatable/data.table/issues/4989). Thanks to Michael Chirico for the PR.
@@ -114,7 +114,7 @@
     ```R
     mtcars |> DT(mpg>20, .(mean_hp=mean(hp)), by=cyl)
     ```
-    
+
 23. `DT[i, nomatch=NULL]` where `i` contains row numbers now excludes `NA` and any outside the range [1,nrow], [#3109](https://github.com/Rdatatable/data.table/issues/3109) [#3666](https://github.com/Rdatatable/data.table/issues/3666). Before, `NA` rows were returned always for such values; i.e. `nomatch=0|NULL` was ignored. Thanks Michel Lang and Hadley Wickham for the requests, and Jan Gorecki for the PR. Using `nomatch=0` in this case when `i` is row numbers generates the warning `Please use nomatch=NULL instead of nomatch=0; see news item 5 in v1.12.0 (Jan 2019)`.
 
     ```R
@@ -246,26 +246,26 @@
     # 2:      b    NA              # NA because there are no non-NA, naturally
                                    # no inconvenient warning
     ```
-    
+
 36. `DT[, min(int64Col), by=grp]` (and `max`) would return incorrect results for `bit64::integer64` columns, [#4444](https://github.com/Rdatatable/data.table/issues/4444). Thanks to @go-see for reporting, and Michael Chirico for the PR.
 
 37. `fread(dec=',')` was able to guess `sep=','` and return an incorrect result, [#4483](https://github.com/Rdatatable/data.table/issues/4483). Thanks to Michael Chirico for reporting and fixing. It was already an error to provide both `sep=','` and `dec=','` manually.
 
     ```R
     fread('A|B|C\n1|0,4|a\n2|0,5|b\n', dec=',')  # no problem
-    
+
     #        A     B      C
     #    <int> <num> <char>
     # 1:     1   0.4      a
     # 2:     2   0.5      b
 
     fread('A|B,C\n1|0,4\n2|0,5\n', dec=',')
-    
+
     #       A|B     C    # old result guessed sep=',' despite dec=','
     #    <char> <int>
     # 1:    1|0     4
     # 2:    2|0     5
-    
+
     #        A   B,C     # now detects sep='|' correctly
     #    <int> <num>
     # 1:     1   0.4
@@ -276,9 +276,9 @@
 
     ```
     IDateTime("20171002095500", format="%Y%m%d%H%M%S")
-    
+
     # was :
-    # Error in charToDate(x) : 
+    # Error in charToDate(x) :
     #   character string is not in a standard unambiguous format
 
     # now :
@@ -287,7 +287,7 @@
     # 1: 2017-10-02 09:55:00
     ```
 
-39. `DT[i, sum(b), by=grp]` (and other optimized-by-group aggregates: `mean`, `var`, `sd`, `median`, `prod`, `min`, `max`, `first`, `last`, `head` and `tail`) could segfault if `i` contains row numbers and one or more are NA, [#1994](https://github.com/Rdatatable/data.table/issues/1994). Thanks to Arun Srinivasan for reporting, and Benjamin Schwendinger for the PR.
+39. `DT[i, sum(b), by=grp]` (and other optimized-by-group aggregates: `mean`, `var`, `sd`, `median`, `prod`, `min`, `max`, `first`, `last`, `head` and `tail`) could segfault if `i` contained row numbers and one or more were NA, [#1994](https://github.com/Rdatatable/data.table/issues/1994). Thanks to Arun Srinivasan for reporting, and Benjamin Schwendinger for the PR.
 
 ## NOTES
 
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index f05d6b2e0e..4d782e4e92 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -3257,7 +3257,7 @@ Sep,33.5,19.4,15.7,11.9,0,100.8,100.8,0,12.7,12.7,0,174.1")
   x[ , r := as.raw(c(0, 1))]
   test(1037.414, melt(x, id.vars='x1', measure.vars='r'),
        error="Unknown column type 'raw' for column 'r'")
-       
+
   # test dispatch for non-data.table objects, #4864.
   if (inherits(try(getNamespace("reshape2"), silent=TRUE),"try-error")) {
     test(1038.001, melt(as.data.frame(DT), id.vars=1:2, measure.vars=5:6),
@@ -6759,7 +6759,7 @@ if (test_xts) {
          " 6: 1970-01-07  6", " 7: 1970-01-08  7", " 8: 1970-01-09  8",
          " 9: 1970-01-10  9", "10: 1970-01-11 10"))
   options(old)
-  
+
   # as.data.table.xts(foo) had incorrect integer index with a column name called 'x', #4897
   M = xts::as.xts(matrix(1, dimnames=list("2021-05-23", "x")))  # xts:: just to be extra robust; shouldn't be needed with rm(as.xts) above
   test(1465.19, inherits(as.data.table(M)$index,"POSIXct"))
@@ -17062,7 +17062,7 @@ registerS3method("format_col", "complex", format_col.complex)
 x = data.table(z = c(1 + 3i, 2 - 1i, pi + 2.718i))
 test(2130.12, x, output = '(1.0, 3.0i)')
 rm(format_col.complex)
-registerS3method("format_col", "complex", format_col.default) 
+registerS3method("format_col", "complex", format_col.default)
 # otherwise it remains registered after test.data.table() and causes test 1610.1 to fail on the next run for example, and user display if they have complex data
 # haven't found a way to unregister an S3 method (tried registering NULL but there's an error that NULL isn't a function)
 
@@ -17779,7 +17779,7 @@ test(2188.12, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, as.Date("2020-01-01")),
 test(2188.13, fifelse(TRUE, 1L, 2.0, "a"), error="'na' is of type character but 'no' is double. Please") # smart error message
 test(2188.14, fifelse(TRUE, NA, 2, as.Date("2019-07-07")),  error="'no' has different class than 'na'. Please")
 test(2188.15, fifelse(TRUE, NA, factor('a'), factor('a', levels = c('a','b'))), error="'no' and 'na' are both type factor but their levels are different")
-test(2188.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA 
+test(2188.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA
 
 # rolling join expected output on non-matching join column has been fixed #1913
 DT = data.table(ID=1:5, A=c(1.3, 1.7, 2.4, 0.9, 0.6))
@@ -17821,7 +17821,7 @@ if (test_bit64) {
   DT[a==1, a:=12]
   DT[a==2, a:=as.integer64(13)]
   test(2193.1, DT, data.table(a = as.integer64(c(12,13,3:10))))
-  
+
   # X[Y,,by=.EACHI] when Y contains integer64 also fixed in 1.12.4, #3779
   X = data.table(x=1:3)
   Y = data.table(x=1:2, y=as.integer64(c(10,20)))
@@ -17899,7 +17899,7 @@ setDTthreads()    # restore default throttle
 # fwrite now allows sep="", #4817
 test(2202.1, fwrite(data.frame(a="id", b=letters[1:5], c=1:5), sep=""),
             output = c("abc", paste0("id", letters[1:5], 1:5)))
-test(2202.2, fwrite(data.frame(a="id", b=1:1e2), sep=""), 
+test(2202.2, fwrite(data.frame(a="id", b=1:1e2), sep=""),
             output = c("ab", paste0("id", 1:1e2)))
 test(2202.3, fwrite(data.table(a=c(NA, 2, 3.01), b=c('foo', NA, 'bar')), sep=""),
              output=c("ab", "foo", "2", "3.01bar"))
diff --git a/src/gsumm.c b/src/gsumm.c
index 713bdc23c4..f806b1e3c8 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -1082,7 +1082,7 @@ static SEXP gvarsd1(SEXP x, SEXP narmArg, bool isSD)
       }
     }}
     break;
-  default:  
+  default:
     error(_("Type '%s' not supported by GForce %s. Either add the prefix stats::var(.) or turn off GForce optimization using options(datatable.optimize=1)"),
             type2char(TYPEOF(x)), isSD?"sd (gsd)":"var (gvar)");
   }