From 274eea604a3966961da675dc7429e05f1cecc0f8 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Thu, 19 Aug 2021 21:03:17 -0600
Subject: [PATCH 1/2] rework gminmax

---
 inst/tests/tests.Rraw |  49 ++++----
 src/gsumm.c           | 275 +++++++++---------------------------------
 2 files changed, 87 insertions(+), 237 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 02b9362822..4f73a19085 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -5178,8 +5178,8 @@ test(1313.04, DT[, max(y, na.rm=TRUE), by=x], DT[, base::max(y, na.rm=TRUE), by=
 DT[x==6, y := INT(NA)]
 test(1313.05, DT[, min(y), by=x], DT[, base::min(y), by=x])
 test(1313.06, DT[, max(y), by=x], DT[, base::max(y), by=x])
-test(1313.07, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(-1,4,4,4,-2147483647,Inf)), warning="No non-missing")
-test(1313.08, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(4,10,10,10,-2147483647,-Inf)), warning="No non-missing")
+test(1313.07, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=INT(-1,4,4,4,-2147483647,NA)))
+test(1313.08, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=INT(4,10,10,10,-2147483647,NA)))
 
 # for numeric
 DT <- data.table(x=rep(1:6, each=3), y=c(4,-1,0, NA,4,10, 4,NA,10, 4,10,NA, -Inf, NA, NA, Inf, NA, NA))
@@ -5191,8 +5191,8 @@ test(1313.12, DT[, max(y, na.rm=TRUE), by=x], DT[, base::max(y, na.rm=TRUE), by=
 DT[x==6, y := NA_real_]
 test(1313.13, DT[, min(y), by=x], DT[, base::min(y), by=x])
 test(1313.14, DT[, max(y), by=x], DT[, base::max(y), by=x])
-test(1313.15, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(-1,4,4,4,-Inf,Inf)), warning="No non-missing")
-test(1313.16, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(4,10,10,10,-Inf,-Inf)), warning="No non-missing")
+test(1313.15, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(-1,4,4,4,-Inf,NA)))
+test(1313.16, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c(4,10,10,10,-Inf,NA)))
 
 # for date (attribute check.. especially after issues/689 !!!)
 DT <- data.table(x = rep(letters[1:2], each=5), y = as.POSIXct('2010-01-01', tz="UTC") + seq(0, 86400*9, 86400))
@@ -5215,8 +5215,8 @@ test(1313.26, DT[, max(y, na.rm=TRUE), by=x], DT[, base::max(y, na.rm=TRUE), by=
 DT[x==6, y := NA_character_]
 test(1313.27, DT[, min(y), by=x], DT[, base::min(y), by=x])
 test(1313.28, DT[, max(y), by=x], DT[, base::max(y), by=x])
-test(1313.29, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:7, V1=c("a","a","c","","a",NA,"")), warning="No non-missing")
-test(1313.30, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:7, V1=c("b","a","c","a","c",NA,"c")), warning="No non-missing")
+test(1313.29, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:7, V1=c("a","a","c","","a",NA,"")))
+test(1313.30, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:7, V1=c("b","a","c","a","c",NA,"c")))
 
 # bug 700 - bmerge, roll=TRUE and nomatch=0L when i's key group occurs more than once
 dt1 <- data.table(structure(list(x = c(7L, 33L), y = structure(c(15912, 15912), class = "Date"), z = c(626550.35284, 7766.385)), .Names =
@@ -8220,18 +8220,23 @@ test(1581.19, DT, DT0[ , var := c('A', 'A', 'B')])
 # handle NULL value correctly #1429
 test(1582, uniqueN(NULL), 0L)
 
-# bug fix #1461
-dt = data.table(x=c(1,1,1,2,2,2,3,3,3,4,4,4,5), y=c(NaN,1,2, 2,NaN,1, NA,NaN,2, NaN,NA,NaN, NaN))
-# make sure gforce is on
-options(datatable.optimize=Inf)
-ans1 = suppressWarnings(dt[, base::min(y, na.rm=TRUE), by=x])
-ans2 = suppressWarnings(dt[, base::max(y, na.rm=TRUE), by=x])
-test(1583.1, dt[, min(y, na.rm=TRUE), by=x], ans1, warning="No non-missing values found")
-test(1583.2, dt[, max(y, na.rm=TRUE), by=x], ans2, warning="No non-missing values found")
-ans3 = suppressWarnings(dt[, base::min(y), by=x])
-ans4 = suppressWarnings(dt[, base::max(y), by=x])
-test(1583.3, dt[, min(y), by=x], ans3)
-test(1583.4, dt[, max(y), by=x], ans4)
+# bug fix #1461 related to NaN not being recognized due to ISNA vs ISNAN at C level
+# verbatim test from the original report:
+options(datatable.optimize=Inf)   # ensure gforce is on
+DT = data.table(
+     C1 = c(rep("A", 4), rep("B",4), rep("C", 4)),
+     C2 = c(rep("a", 3), rep("b",3), rep("c",3), rep("d",3)),
+     Val = c(1:5, NaN, NaN, 8,9,10,NaN,12))
+test(1583.1, DT[, .(agg = min(Val, na.rm=TRUE)), by=c('C1', 'C2')],
+             data.table(C1=c("A","A","B","B","C","C"),
+                        C2=c("a","b","b","c","c","d"),
+                        agg=c(1,4,5,8,9,10)))
+# extra test with a size-1 group containing one NaN too
+DT = data.table(x=INT(1,1,1,2,2,2,3,3,3,4,4,4,5), y=c(NaN,1,2, 2,NaN,1, NA,NaN,2, NaN,NA,NaN, NaN))
+test(1583.2, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:5, V1=c(1,1,2,NA,NA)))
+test(1583.3, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:5, V1=c(2,2,2,NA,NA)))
+test(1583.4, DT[, min(y), by=x], data.table(x=1:5, V1=c(NaN,NaN,NA,NaN,NaN)))
+test(1583.5, DT[, max(y), by=x], data.table(x=1:5, V1=c(NaN,NaN,NA,NaN,NaN)))
 
 # Fixed a minor bug in fread when blank.lines.skip=TRUE
 f1 <- function(x, f=TRUE, b=FALSE) fread(x, fill=f, blank.lines.skip=b, data.table=FALSE, logical01=FALSE)
@@ -14700,8 +14705,8 @@ if (test_bit64) {
   test(2019, DT[2:6, sum(v), id], data.table(id=1:2, V1=bit64::as.integer64(c(5L,15L)))) # gather, case of int64 and irows
 }
 DT = data.table(id = c(1L,1L,2L), v = as.raw(0:2))
-test(2020.01, DT[, min(v), by=id], error="'raw' not supported by GForce min")
-test(2020.02, DT[, max(v), by=id], error="'raw' not supported by GForce max")
+test(2020.01, DT[, min(v), by=id], error="'raw' not supported by GForce min/max")
+test(2020.02, DT[, max(v), by=id], error="'raw' not supported by GForce min/max")
 test(2020.03, DT[, median(v), by=id], error="'raw' not supported by GForce median")
 test(2020.04, DT[, head(v, 1), by=id], error="'raw' not supported by GForce head")
 test(2020.05, DT[, tail(v, 1), by=id], error="'raw' not supported by GForce tail")
@@ -15820,8 +15825,8 @@ test(2065.7, DT[1L, z_sum := 1i][1L, z_sum], 1i)
 
 # GForce for complex columns, part of #3690
 DT = data.table(id=c(1L,1L,2L), v=c(1i, 2i, 3i))
-test(2066.01, DT[, min(v), by=id], error="'complex' has no well-defined min")
-test(2066.02, DT[, max(v), by=id], error="'complex' has no well-defined max")
+test(2066.01, DT[, min(v), by=id], error="'complex' has no well-defined min/max")
+test(2066.02, DT[, max(v), by=id], error="'complex' has no well-defined min/max")
 test(2066.03, DT[, head(v, 1), by=id], data.table(id=1:2, V1=c(1, 3)*1i))
 test(2066.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i))
 test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA)))
diff --git a/src/gsumm.c b/src/gsumm.c
index 651f1c3385..b925b35fa6 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -713,267 +713,112 @@ SEXP gmean(SEXP x, SEXP narmArg)
   return(ans);
 }
 
-// gmin
-SEXP gmin(SEXP x, SEXP narm)
+static SEXP gminmax(SEXP x, SEXP narm, const bool min)
 {
   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
-  if (!isVectorAtomic(x)) error(_("GForce min can only be applied to columns, not .SD or similar. To find min of all items in a list such as .SD, either add the prefix base::min(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,min),by=,.SDcols=]'"));
-  if (inherits(x, "factor") && !inherits(x, "ordered")) error(_("min is not meaningful for factors."));
+  if (!isVectorAtomic(x)) error(_("GForce min/max can only be applied to columns, not .SD or similar. To find min/max of all items in a list such as .SD, either add the prefix base::min(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,min),by=,.SDcols=]'"));
+  if (inherits(x, "factor") && !inherits(x, "ordered")) error(_("min/max is not meaningful for factors."));
   const int n = (irowslen == -1) ? length(x) : irowslen;
   //clock_t start = clock();
   SEXP ans;
-  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmin");
-  int protecti=0;
+  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmin");  
+  // GForce guarantees each group has at least one value; i.e. we don't need to consider length-0 per group here
   switch(TYPEOF(x)) {
-  case LGLSXP: case INTSXP:
-    ans = PROTECT(allocVector(INTSXP, ngrp)); protecti++;
+  case LGLSXP: case INTSXP: {
+    ans = PROTECT(allocVector(INTSXP, ngrp));
+    int *ansd = INTEGER(ans);
+    const int *xd = INTEGER(x);
     if (!LOGICAL(narm)[0]) {
-      for (int i=0; i<ngrp; ++i) INTEGER(ans)[i] = INT_MAX;
+      const int init = min ? INT_MAX : INT_MIN+1;  // NA_INTEGER==INT_MIN checked in init.c
+      for (int i=0; i<ngrp; ++i) ansd[i] = init;
       for (int i=0; i<n; ++i) {
         const int thisgrp = grp[i];
+        if (ansd[thisgrp]==NA_INTEGER) continue;  // once an NA has been observed in this group, it doesn't matter what the remaining values in this group are
         const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (INTEGER(x)[ix] < INTEGER(ans)[thisgrp])   // NA_INTEGER==INT_MIN checked in init.c
-          INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
+        if (xd[ix]==NA_INTEGER || (xd[ix]<ansd[thisgrp])==min)   
+          ansd[thisgrp] = xd[ix];  // always true on the first value in the group (other than if the first value is INT_MAX or INT_MIN-1 which is fine too)
       }
     } else {
-      for (int i=0; i<ngrp; ++i) INTEGER(ans)[i] = NA_INTEGER;
+      for (int i=0; i<ngrp; ++i) ansd[i] = NA_INTEGER;  // in the all-NA case we now return NA for type consistency 
       for (int i=0; i<n; ++i) {
-        const int thisgrp = grp[i];
         const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (INTEGER(x)[ix] == NA_INTEGER) continue;
-        if (INTEGER(ans)[thisgrp] == NA_INTEGER || INTEGER(x)[ix] < INTEGER(ans)[thisgrp])
-          INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
-      }
-      for (int i=0; i<ngrp; ++i) {
-        if (INTEGER(ans)[i] == NA_INTEGER) {
-          warning(_("No non-missing values found in at least one group. Coercing to numeric type and returning 'Inf' for such groups to be consistent with base"));
-          ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
-          for (int i=0; i<ngrp; i++) {
-            if (ISNA(REAL(ans)[i])) REAL(ans)[i] = R_PosInf;
-          }
-          break;
-        }
+        if (xd[ix] == NA_INTEGER) continue;
+        const int thisgrp = grp[i];
+        if (ansd[thisgrp]==NA_INTEGER || (xd[ix]<ansd[thisgrp])==min)
+          ansd[thisgrp] = xd[ix];
       }
-    }
+    }}
     break;
-  case STRSXP:
-    ans = PROTECT(allocVector(STRSXP, ngrp)); protecti++;
+  case STRSXP: {
+    ans = PROTECT(allocVector(STRSXP, ngrp));
+    const SEXP *ansd = STRING_PTR(ans);
+    const SEXP *xd = STRING_PTR(x);
     if (!LOGICAL(narm)[0]) {
-      for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, char_maxString); // char_maxString == "\xFF\xFF..." in init.c
+      const SEXP init = min ? char_maxString : R_BlankString;  // char_maxString == "\xFF\xFF..." in init.c
+      for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, init); 
       for (int i=0; i<n; ++i) {
         const int thisgrp = grp[i];
+        if (ansd[thisgrp]==NA_STRING) continue;
         const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (STRING_ELT(x, ix) == NA_STRING) {
-          SET_STRING_ELT(ans, thisgrp, NA_STRING);
-        } else {
-          if (STRING_ELT(ans, thisgrp)==char_maxString || 
-             (STRING_ELT(ans, thisgrp)!=NA_STRING && strcmp(CHAR(STRING_ELT(x, ix)), CHAR(STRING_ELT(ans, thisgrp))) < 0)) {
-            SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
-          }
-        }
+        if (xd[ix]==NA_STRING || (strcmp(CHAR(xd[ix]), CHAR(ansd[thisgrp]))<0)==min)
+          SET_STRING_ELT(ans, thisgrp, xd[ix]);
       }
     } else {
-      for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, NA_STRING);
+      for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, NA_STRING); // all missing returns NA consistent with base
       for (int i=0; i<n; ++i) {
-        const int thisgrp = grp[i];
         const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (STRING_ELT(x, ix) == NA_STRING) continue;
-        if (STRING_ELT(ans, thisgrp) == NA_STRING ||
-          strcmp(CHAR(STRING_ELT(x, ix)), CHAR(STRING_ELT(ans, thisgrp))) < 0) {
-          SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
-        }
-      }
-      for (int i=0; i<ngrp; ++i) {
-        if (STRING_ELT(ans, i)==NA_STRING) {
-          warning(_("No non-missing values found in at least one group. Returning 'NA' for such groups to be consistent with base"));
-          break;
-        }
+        if (xd[ix]==NA_STRING) continue;
+        const int thisgrp = grp[i];
+        if (ansd[thisgrp]==NA_STRING || (strcmp(CHAR(xd[ix]), CHAR(ansd[thisgrp]))<0)==min)
+          SET_STRING_ELT(ans, thisgrp, xd[ix]);
       }
-    }
+    }}
     break;
-  case REALSXP:
-    ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++;
+  case REALSXP: {
+    ans = PROTECT(allocVector(REALSXP, ngrp));
+    double *ansd = REAL(ans);
+    const double *xd = REAL(x);
     if (!LOGICAL(narm)[0]) {
-      for (int i=0; i<ngrp; ++i) REAL(ans)[i] = R_PosInf;
+      const double init = min ? R_PosInf : R_NegInf;
+      for (int i=0; i<ngrp; ++i) ansd[i] = init;
       for (int i=0; i<n; ++i) {
         const int thisgrp = grp[i];
+        if (ISNAN(ansd[thisgrp])) continue;
         const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (ISNAN(REAL(x)[ix]) || REAL(x)[ix] < REAL(ans)[thisgrp])
-          REAL(ans)[thisgrp] = REAL(x)[ix];
+        if (ISNAN(xd[ix]) || (xd[ix]<ansd[thisgrp])==min)
+          ansd[thisgrp] = xd[ix];
       }
     } else {
-      for (int i=0; i<ngrp; ++i) REAL(ans)[i] = NA_REAL;
+      for (int i=0; i<ngrp; ++i) ansd[i] = NA_REAL;
       for (int i=0; i<n; ++i) {
-        const int thisgrp = grp[i];
         const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (ISNAN(REAL(x)[ix])) continue;
-        if (ISNAN(REAL(ans)[thisgrp]) || REAL(x)[ix] < REAL(ans)[thisgrp])
-          REAL(ans)[thisgrp] = REAL(x)[ix];
-      }
-      for (int i=0; i<ngrp; ++i) {
-        if (ISNAN(REAL(ans)[i])) {
-          warning(_("No non-missing values found in at least one group. Returning 'Inf' for such groups to be consistent with base"));
-          for (; i<ngrp; i++) if (ISNAN(REAL(ans)[i])) REAL(ans)[i] = R_PosInf;
-          break;
-        }
+        if (ISNAN(xd[ix])) continue;
+        const int thisgrp = grp[i];  
+        if (ISNAN(ansd[thisgrp]) || (xd[ix]<ansd[thisgrp])==min)
+          ansd[thisgrp] = xd[ix];
       }
-    }
+    }}
     break;
   case CPLXSXP:
-    error(_("Type 'complex' has no well-defined min"));
+    error(_("Type 'complex' has no well-defined min/max"));
     break;
   default:
-    error(_("Type '%s' not supported by GForce min (gmin). Either add the prefix base::min(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
+    error(_("Type '%s' not supported by GForce min/max. Either add the prefix base::min(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
   }
   copyMostAttrib(x, ans); // all but names,dim and dimnames. And if so, we want a copy here, not keepattr's SET_ATTRIB.
-  UNPROTECT(protecti);  // ans + maybe 1 coerced ans
-  // Rprintf(_("this gmin took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
+  UNPROTECT(1);  // ans
+  // Rprintf(_("this gminmax took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
   return(ans);
 }
 
-// gmax
-SEXP gmax(SEXP x, SEXP narm)
+SEXP gmin(SEXP x, SEXP narm)
 {
-  if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error(_("na.rm must be TRUE or FALSE"));
-  if (!isVectorAtomic(x)) error(_("GForce max can only be applied to columns, not .SD or similar. To find max of all items in a list such as .SD, either add the prefix base::max(.SD) or turn off GForce optimization using options(datatable.optimize=1). More likely, you may be looking for 'DT[,lapply(.SD,max),by=,.SDcols=]'"));
-  if (inherits(x, "factor") && !inherits(x, "ordered")) error(_("max is not meaningful for factors."));
-  const int n = (irowslen == -1) ? length(x) : irowslen;
-  //clock_t start = clock();
-  SEXP ans;
-  if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gmax");
+  return gminmax(x, narm, true);
+}
 
-  // TODO rework gmax in the same way as gmin and remove this *update
-  char *update = (char *)R_alloc(ngrp, sizeof(char));
-  for (int i=0; i<ngrp; ++i) update[i] = 0;
-  int protecti=0;
-  switch(TYPEOF(x)) {
-  case LGLSXP: case INTSXP:
-    ans = PROTECT(allocVector(INTSXP, ngrp)); protecti++;
-    for (int i=0; i<ngrp; ++i) INTEGER(ans)[i] = 0;
-    if (!LOGICAL(narm)[0]) { // simple case - deal in a straightforward manner first
-      for (int i=0; i<n; ++i) {
-        const int thisgrp = grp[i];
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (INTEGER(x)[ix] != NA_INTEGER && INTEGER(ans)[thisgrp] != NA_INTEGER) {
-          if ( update[thisgrp] != 1 || INTEGER(ans)[thisgrp] < INTEGER(x)[ix] ) {
-            INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
-            if (update[thisgrp] != 1) update[thisgrp] = 1;
-          }
-        } else  INTEGER(ans)[thisgrp] = NA_INTEGER;
-      }
-    } else {
-      for (int i=0; i<n; ++i) {
-        const int thisgrp = grp[i];
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (INTEGER(x)[ix] != NA_INTEGER) {
-          if ( update[thisgrp] != 1 || INTEGER(ans)[thisgrp] < INTEGER(x)[ix] ) {
-            INTEGER(ans)[thisgrp] = INTEGER(x)[ix];
-            if (update[thisgrp] != 1) update[thisgrp] = 1;
-          }
-        } else {
-          if (update[thisgrp] != 1) {
-            INTEGER(ans)[thisgrp] = NA_INTEGER;
-          }
-        }
-      }
-      for (int i=0; i<ngrp; ++i) {
-        if (update[i] != 1)  {// equivalent of INTEGER(ans)[thisgrp] == NA_INTEGER
-          warning(_("No non-missing values found in at least one group. Coercing to numeric type and returning 'Inf' for such groups to be consistent with base"));
-          ans = PROTECT(coerceVector(ans, REALSXP)); protecti++;
-          for (int i=0; i<ngrp; ++i) {
-            if (update[i] != 1) REAL(ans)[i] = -R_PosInf;
-          }
-          break;
-        }
-      }
-    }
-    break;
-  case STRSXP:
-    ans = PROTECT(allocVector(STRSXP, ngrp)); protecti++;
-    for (int i=0; i<ngrp; ++i) SET_STRING_ELT(ans, i, mkChar(""));
-    if (!LOGICAL(narm)[0]) { // simple case - deal in a straightforward manner first
-      for (int i=0; i<n; ++i) {
-        const int thisgrp = grp[i];
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (STRING_ELT(x,ix) != NA_STRING && STRING_ELT(ans, thisgrp) != NA_STRING) {
-          if ( update[thisgrp] != 1 || strcmp(CHAR(STRING_ELT(ans, thisgrp)), CHAR(STRING_ELT(x,ix))) < 0 ) {
-            SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
-            if (update[thisgrp] != 1) update[thisgrp] = 1;
-          }
-        } else  SET_STRING_ELT(ans, thisgrp, NA_STRING);
-      }
-    } else {
-      for (int i=0; i<n; ++i) {
-        const int thisgrp = grp[i];
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if (STRING_ELT(x, ix) != NA_STRING) {
-          if ( update[thisgrp] != 1 || strcmp(CHAR(STRING_ELT(ans, thisgrp)), CHAR(STRING_ELT(x, ix))) < 0 ) {
-            SET_STRING_ELT(ans, thisgrp, STRING_ELT(x, ix));
-            if (update[thisgrp] != 1) update[thisgrp] = 1;
-          }
-        } else {
-          if (update[thisgrp] != 1) {
-            SET_STRING_ELT(ans, thisgrp, NA_STRING);
-          }
-        }
-      }
-      for (int i=0; i<ngrp; ++i) {
-        if (update[i] != 1)  {// equivalent of INTEGER(ans)[thisgrp] == NA_INTEGER
-          warning(_("No non-missing values found in at least one group. Returning 'NA' for such groups to be consistent with base"));
-          break;
-        }
-      }
-    }
-    break;
-  case REALSXP:
-    ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++;
-    for (int i=0; i<ngrp; ++i) REAL(ans)[i] = 0;
-    if (!LOGICAL(narm)[0]) {
-      for (int i=0; i<n; ++i) {
-        const int thisgrp = grp[i];
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if ( !ISNA(REAL(x)[ix]) && !ISNA(REAL(ans)[thisgrp]) ) {
-          if ( update[thisgrp] != 1 || REAL(ans)[thisgrp] < REAL(x)[ix] ||
-             (ISNAN(REAL(x)[ix]) && !ISNAN(REAL(ans)[thisgrp])) ) { // #1461
-            REAL(ans)[thisgrp] = REAL(x)[ix];
-            if (update[thisgrp] != 1) update[thisgrp] = 1;
-          }
-        } else REAL(ans)[thisgrp] = NA_REAL;
-      }
-    } else {
-      for (int i=0; i<n; ++i) {
-        const int thisgrp = grp[i];
-        const int ix = (irowslen == -1) ? i : irows[i]-1;
-        if ( !ISNAN(REAL(x)[ix]) ) { // #1461
-          if ( update[thisgrp] != 1 || REAL(ans)[thisgrp] < REAL(x)[ix] ) {
-            REAL(ans)[thisgrp] = REAL(x)[ix];
-            if (update[thisgrp] != 1) update[thisgrp] = 1;
-          }
-        } else {
-          if (update[thisgrp] != 1) {
-            REAL(ans)[thisgrp] = -R_PosInf;
-          }
-        }
-      }
-      // everything taken care of already. Just warn if all NA groups have occurred at least once
-      for (int i=0; i<ngrp; ++i) {
-        if (update[i] != 1)  { // equivalent of REAL(ans)[thisgrp] == -R_PosInf
-          warning(_("No non-missing values found in at least one group. Returning '-Inf' for such groups to be consistent with base"));
-          break;
-        }
-      }
-    }
-    break;
-  case CPLXSXP:
-    error(_("Type 'complex' has no well-defined max"));
-    break;
-  default:
-    error(_("Type '%s' not supported by GForce max (gmax). Either add the prefix base::max(.) or turn off GForce optimization using options(datatable.optimize=1)"), type2char(TYPEOF(x)));
-  }
-  copyMostAttrib(x, ans); // all but names,dim and dimnames. And if so, we want a copy here, not keepattr's SET_ATTRIB.
-  UNPROTECT(protecti);
-  // Rprintf(_("this gmax took %8.3f\n"), 1.0*(clock()-start)/CLOCKS_PER_SEC);
-  return(ans);
+SEXP gmax(SEXP x, SEXP narm)
+{
+  return gminmax(x, narm, false);
 }
 
 // gmedian, always returns numeric type (to avoid as.numeric() wrap..)

From dc1cfe6ccf0349dfde1e301fc3120f1343737d2a Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Thu, 19 Aug 2021 23:06:27 -0600
Subject: [PATCH 2/2] add news item

---
 NEWS.md | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 769b98dafa..5e666241af 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -185,6 +185,50 @@
 
 34. `na.omit(DT)` did not remove `NA` in `nanotime` columns, [#4744](https://github.com/Rdatatable/data.table/issues/4744). Thanks Jean-Mathieu Vermosen for reporting, and Michael Chirico for the PR.
 
+35. `DT[, min(intCol, na.rm=TRUE), by=grp]` would return `Inf` for any groups containing all NAs, with a type change from `integer` to `numeric` to hold the `Inf`, and with warning. Now `NA` is returned for such all-NA groups, without warning or type change. This is almost-surely less surprising, more convenient, consistent, and efficient. There was no user request for this, likely because our desire to be consistent with base R in this regard was known (base R's `min(x, na.rm=TRUE)` returns `Inf` with warning for all-NA input). Matt Dowle made this change when reworking internals, [#5105](https://github.com/Rdatatable/data.table/pull/5105). The old behavior seemed so bad, and since there was a warning too, it seemed more appropriate to treat it as a bug.
+
+    ```R
+    DT
+    #         A     B
+    #    <char> <int>
+    # 1:      a     1
+    # 2:      a    NA
+    # 3:      b     2
+    # 4:      b    NA
+
+    DT[, min(B,na.rm=TRUE), by=A]  # no change in behavior (no all-NA groups yet)
+    #         A    V1
+    #    <char> <int>
+    # 1:      a     1
+    # 2:      b     2
+
+    DT[3, B:=NA]                   # make an all-NA group
+    DT
+    #         A     B
+    #    <char> <int>
+    # 1:      a     1
+    # 2:      a    NA
+    # 3:      b    NA
+    # 4:      b    NA
+
+    DT[, min(B,na.rm=TRUE), by=A]  # old result
+    #         A    V1
+    #    <char> <num>              # V1's type changed to numeric (inconsistent)
+    # 1:      a     1
+    # 2:      b   Inf              # Inf surprising
+    # Warning message:             # warning inconvenient
+    # In gmin(B, na.rm = TRUE) :
+    #   No non-missing values found in at least one group. Coercing to numeric
+    #   type and returning 'Inf' for such groups to be consistent with base
+
+    DT[, min(B,na.rm=TRUE), by=A]  # new result
+    #         A    V1
+    #    <char> <int>              # V1's type remains integer (consistent)
+    # 1:      a     1
+    # 2:      b    NA              # NA because there are no non-NA, naturally
+                                   # no inconvenient warning
+    ```
+
 ## NOTES
 
 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.<type>()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example :