From bf4fb109762dc12d7ee96227554bb3dc0c68018b Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Wed, 10 Jul 2019 14:30:22 +0800
Subject: [PATCH 01/20] initial take at #3639 -- proper handling of CPLXSXP in
 dogroups

remove debugging helpers

remove debugging helpers

remove debugging helpers
---
 R/data.table.R |  1 -
 src/dogroups.c | 19 ++++++++++++++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index a13597926b..823ae84cb5 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -889,7 +889,6 @@ replace_order = function(isub, verbose, env) {
       } # else maybe a call to transform or something which returns a list.
       av = all.vars(jsub,TRUE)  # TRUE fixes bug #1294 which didn't see b in j=fns[[b]](c)
       use.I = ".I" %chin% av
-      # browser()
       if (any(c(".SD","eval","get","mget") %chin% av)) {
         if (missing(.SDcols)) {
           # here we need to use 'dupdiff' instead of 'setdiff'. Ex: setdiff(c("x", "x"), NULL) will give 'x'.
diff --git a/src/dogroups.c b/src/dogroups.c
index 8009f70ae8..437c14720e 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -2,6 +2,7 @@
 #include <Rdefines.h>
 #include <fcntl.h>
 #include <time.h>
+#include <complex.h>
 
 SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose)
 {
@@ -214,13 +215,20 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
               rownum = iI[k]-1;
               td[k] = sd[rownum];  // on 32bit copies pointers too
             }
-          } else {  // size 8
+          } else if (size==8) {
             double *td = REAL(target);
             const double *sd = REAL(source);
             for (int k=0; k<grpn; ++k) {
               rownum = iI[k]-1;
               td[k] = sd[rownum];  // on 64bit copies pointers too
             }
+          } else { // size 16
+            double complex *td = (double complex *)COMPLEX(target);
+            const double complex *sd = (double complex *)COMPLEX(source);
+            for (int k=0; k<grpn; ++k) {
+              rownum = iI[k]-1;
+              td[k] = ds[rownum];
+            }
           }
         }
         if (LOGICAL(verbose)[0]) { tblock[1] += clock()-tstart; nblock[1]++; }
@@ -382,14 +390,19 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
     for (int j=0; j<ngrpcols; ++j) {
       target = VECTOR_ELT(ans,j);
       source = VECTOR_ELT(groups, INTEGER(grpcols)[j]-1);  // target and source the same type by construction above
-      if (SIZEOF(target)==4) {
+      int tsize = SIZEOF(target);
+      if (tsize==4) {
         int *td = INTEGER(target);
         int *sd = INTEGER(source);
         for (int r=0; r<maxn; ++r) td[ansloc+r] = sd[igrp];
-      } else {
+      } else if (tsize==8) {
         double *td = REAL(target);
         double *sd = REAL(source);
         for (int r=0; r<maxn; ++r) td[ansloc+r] = sd[igrp];
+      } else {
+        double complex *td = COMPLEX(target);
+        double complex *sd = COMPLEX(source);
+        for (int r=0; r<maxn; ++r) td[ansloc+r] = sd[igrp];
       }
       // Shouldn't need SET_* to age objects here since groups, TO DO revisit.
     }

From 24c8849a1a5111b05eb56d4b0f38f467e72e5e5c Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Wed, 10 Jul 2019 18:24:17 +0800
Subject: [PATCH 02/20] main issue was name collision of I with that in
 complex.h; now working

---
 NEWS.md               |  2 ++
 inst/tests/tests.Rraw |  9 +++++++++
 src/dogroups.c        | 27 +++++++++++++++------------
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 10e3d5d5ea..1754ecf805 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -192,6 +192,8 @@
 
 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report.
 
+25. Complex columns used in `j` during grouping would get mangled, [#3639](https://github.com/Rdatatable/data.table/issues/3639). We still do not support grouping `by` a complex column; please file a feature request if you would use this in your own work. Thanks to @eliocamp for filing the bug report.
+
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 1c720d4de9..4e8e51c7f0 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15200,6 +15200,15 @@ d2 = data.table(r = 1:5, s = seq(0L, 20L, 5L))
 test(2062.1, d1[d2, on = .(a <= s, b >= s), j = .SD],   ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20)))
 test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans)
 
+# #3639 -- complex values in grouping
+set.seed(42)
+DT = CJ(x = 1:10, a = c("a", "b"), b = 1:2)
+DT[ , z := complex(rnorm(1:.N), rnorm(1:.N))]
+## can simplify this test after #1444
+test(2063.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b)][order(x, a, b)], ignore.col.order = TRUE))
+test(2063.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i)))
+test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V2 = c(16.031422657932, 13.533483145656)))
+
 
 ###################################
 #  Add new tests above this line  #
diff --git a/src/dogroups.c b/src/dogroups.c
index 437c14720e..a4c56880de 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -8,7 +8,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
 {
   R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0;
   int protecti=0;
-  SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp;
+  SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, DOTI, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp;
   Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE;
   clock_t tstart=0, tblock[10]={0}; int nblock[10]={0};
 
@@ -53,7 +53,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   for (R_len_t i=0; i<n; ++i) {
     if (ilens[i] > maxGrpSize) maxGrpSize = ilens[i];
   }
-  defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++;
+  // #3639 introduced complex.h, which defines I, so use DOTI to differentiate
+  defineVar(install(".I"), DOTI = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++;
   R_LockBinding(install(".I"), env);
 
   dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; // added here to fix #4990 - `:=` did not issue recycling warning during "by"
@@ -77,7 +78,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
     copyMostAttrib(VECTOR_ELT(dt,INTEGER(dtcols)[i]-1), VECTOR_ELT(SDall,i));  // not names, otherwise test 778 would fail
   }
 
-  origIlen = length(I);  // test 762 has length(I)==1 but nrow(SD)==0
+  origIlen = length(DOTI);  // test 762 has length(DOTI)==1 but nrow(SD)==0
   if (length(SDall)) origSDnrow = length(VECTOR_ELT(SDall, 0));
 
   xknames = getAttrib(xSD, R_NamesSymbol);
@@ -155,8 +156,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         }
       }
       grpn = 1;  // it may not be 1 e.g. test 722. TODO: revisit.
-      SETLENGTH(I, grpn);
-      INTEGER(I)[0] = 0;
+      SETLENGTH(DOTI, grpn);
+      INTEGER(DOTI)[0] = 0;
       for (int j=0; j<length(xSD); ++j) {
         switch (TYPEOF(VECTOR_ELT(xSD, j))) {
         case LGLSXP :
@@ -180,8 +181,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
       }
     } else {
       if (LOGICAL(verbose)[0]) tstart = clock();
-      SETLENGTH(I, grpn);
-      int *iI = INTEGER(I);
+      SETLENGTH(DOTI, grpn);
+      int *iI = INTEGER(DOTI);
       if (LENGTH(order)==0) {
         if (grpn) rownum = istarts[i]-1; else rownum = -1;  // not ternary to pass strict-barrier
         for (int j=0; j<grpn; ++j) iI[j] = rownum+j+1;
@@ -223,11 +224,12 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
               td[k] = sd[rownum];  // on 64bit copies pointers too
             }
           } else { // size 16
+            // #3634 -- CPLXSXP columns have size 16
             double complex *td = (double complex *)COMPLEX(target);
             const double complex *sd = (double complex *)COMPLEX(source);
             for (int k=0; k<grpn; ++k) {
               rownum = iI[k]-1;
-              td[k] = ds[rownum];
+              td[k] = sd[rownum];
             }
           }
         }
@@ -310,7 +312,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         }
         memrecycle(target, order, INTEGER(starts)[i]-1, grpn, RHS);   // length mismatch checked above for all jval columns before starting to add any new columns
         copyMostAttrib(RHS, target);  // not names, otherwise test 778 would fail.
-        /* OLD FIX: commented now. The fix below resulted in segfault on factor columns because I dint set the "levels"
+        /* OLD FIX: commented now. The fix below resulted in segfault on factor columns because I didn't set the "levels"
            Instead of fixing that, I just removed setting class if it's factor. Not appropriate fix.
            Correct fix of copying all attributes (except names) added above. Now, everything should be alright.
            Test 1144 (#5104) will provide the right output now. Modified accordingly.
@@ -400,8 +402,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         double *sd = REAL(source);
         for (int r=0; r<maxn; ++r) td[ansloc+r] = sd[igrp];
       } else {
-        double complex *td = COMPLEX(target);
-        double complex *sd = COMPLEX(source);
+        // #3634 -- CPLXSXP columns have size 16
+        double complex *td = (double complex *)COMPLEX(target);
+        double complex *sd = (double complex *)COMPLEX(source);
         for (int r=0; r<maxn; ++r) td[ansloc+r] = sd[igrp];
       }
       // Shouldn't need SET_* to age objects here since groups, TO DO revisit.
@@ -465,7 +468,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   } else ans = R_NilValue;
   // Now reset length of .SD columns and .I to length of largest group, otherwise leak if the last group is smaller (often is).
   for (int j=0; j<length(SDall); ++j) SETLENGTH(VECTOR_ELT(SDall,j), origSDnrow);
-  SETLENGTH(I, origIlen);
+  SETLENGTH(DOTI, origIlen);
   if (LOGICAL(verbose)[0]) {
     if (nblock[0] && nblock[1]) error("Internal error: block 0 [%d] and block 1 [%d] have both run", nblock[0], nblock[1]); // # nocov
     int w = nblock[1]>0;

From 0288c5f142546ac80a3b927bcb40129d1a102330 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Wed, 10 Jul 2019 18:31:51 +0800
Subject: [PATCH 03/20] typo

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 4e8e51c7f0..0ae0f541c4 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15207,7 +15207,7 @@ DT[ , z := complex(rnorm(1:.N), rnorm(1:.N))]
 ## can simplify this test after #1444
 test(2063.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b)][order(x, a, b)], ignore.col.order = TRUE))
 test(2063.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i)))
-test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V2 = c(16.031422657932, 13.533483145656)))
+test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V1 = c(16.031422657932, 13.533483145656)))
 
 
 ###################################

From 3f99784d4ee67ea057889296f8c939d8350a6164 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Wed, 10 Jul 2019 19:36:25 +0800
Subject: [PATCH 04/20] add CPLXSXP case to some switches

---
 src/dogroups.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/dogroups.c b/src/dogroups.c
index a4c56880de..361108a6a3 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -145,6 +145,11 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         case REALSXP :
           REAL(VECTOR_ELT(SDall,j))[0] = NA_REAL;
           break;
+        case CPLXSXP :
+          // no NA_COMPLEX; have to set r & i parts to NA_REAL individually
+          COMPLEX(VECTOR_ELT(SDall, j))[0].r = NA_REAL;
+          COMPLEX(VECTOR_ELT(SDall, j))[0].i = NA_REAL;
+          break;
         case STRSXP :
           SET_STRING_ELT(VECTOR_ELT(SDall,j),0,NA_STRING);
           break;
@@ -169,6 +174,10 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         case REALSXP :
           REAL(VECTOR_ELT(xSD,j))[0] = NA_REAL;
           break;
+        case CPLXSXP :
+          COMPLEX(VECTOR_ELT(xSD, j))[0].r = NA_REAL;
+          COMPLEX(VECTOR_ELT(xSD, j))[0].i = NA_REAL;
+          break;
         case STRSXP :
           SET_STRING_ELT(VECTOR_ELT(xSD,j),0,NA_STRING);
           break;

From 6ae663955331e1750ecc1eb98d22287ae338ace5 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Wed, 10 Jul 2019 20:18:16 +0800
Subject: [PATCH 05/20] another switch() branch included

---
 src/dogroups.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/dogroups.c b/src/dogroups.c
index 361108a6a3..4d18af118c 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -4,6 +4,12 @@
 #include <time.h>
 #include <complex.h>
 
+// copied from r-source/src/main/Rcomplex.h
+#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32))
+# undef  I
+# define I (__extension__ 1.0iF)
+#endif
+
 SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose)
 {
   R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0;
@@ -440,6 +446,11 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
           double *td = REAL(target)+thisansloc;
           for (int r=0; r<maxn; ++r) td[r] = NA_REAL;
         } break;
+        case CPLXSXP : {
+        double complex *td = (double complex *)(COMPLEX(target) + thisansloc);
+        for (int r=0; r<maxn; ++r) { td[r] = NA_REAL + NA_REAL*I; }
+        //for (int r=0; r<,maxn; ++r) { creal(td[r]) = NA_REAL; cimag(td[r]) = NA_REAL; }
+      } break;
         case STRSXP :
           for (int r=0; r<maxn; ++r) SET_STRING_ELT(target,thisansloc+r,NA_STRING);
           break;

From 603b91384805cc71ca237d70564379723b4c0656 Mon Sep 17 00:00:00 2001
From: Michael Chirico <MichaelChirico4@gmail.com>
Date: Thu, 11 Jul 2019 02:38:40 +0800
Subject: [PATCH 06/20] Implement gsum, gmean, gfirst, glast, g[ for complex
 vectors, part of #3690

---
 src/data.table.h |   7 +++
 src/gsumm.c      | 157 ++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 155 insertions(+), 9 deletions(-)

diff --git a/src/data.table.h b/src/data.table.h
index a2a6204051..052086a9fb 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -63,6 +63,13 @@ typedef R_xlen_t RLEN;
 #define ALTREP(x) 0  // for R<3.5.0, see issue #2866 and grep for "ALTREP" to see comments where it's used
 #endif
 
+// for complex type support; copied from r-source/src/main/Rcomplex.h
+#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32))
+# undef  I
+# define I (__extension__ 1.0iF)
+#endif
+
+
 // init.c
 SEXP char_integer64;
 SEXP char_ITime;
diff --git a/src/gsumm.c b/src/gsumm.c
index 6263b33b65..c327988f72 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -1,5 +1,6 @@
 #include "data.table.h"
 //#include <time.h>
+#include <complex.h>
 
 static int ngrp = 0;         // number of groups
 static int *grpsize = NULL;  // size of each group, used by gmean (and gmedian) not gsum
@@ -298,8 +299,38 @@ void *gather(SEXP x, bool *anyNA)
       }
     }
   } break;
+  case CPLXSXP: {
+    const double complex *restrict thisx = (double complex *)COMPLEX(x);
+    #pragma omp parallel for num_threads(getDTthreads())
+    for (int b=0; b<nBatch; b++) {
+      int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
+      memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(double complex));   // original cumulated   // already cumulated for this batch
+      double complex *restrict my_gx = (double complex *)gx + b*batchSize;
+      const uint16_t *my_high = high + b*batchSize;
+      const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
+      bool my_anyNA = false;
+      if (irowslen==-1) {
+        const double complex *my_x = thisx + b*batchSize;
+        for (int i=0; i<howMany; i++) {
+          const double complex elem = my_x[i];
+          my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
+          // typically just checking one component would be enough,
+          //   but ?complex suggests there may be some edge cases; better to be safe
+          if (creal(elem)==NA_REAL && cimag(elem) == NA_REAL) my_anyNA = true;
+        }
+      } else {
+        const int *my_x = irows + b*batchSize;
+        for (int i=0; i<howMany; i++) {
+          double complex elem = thisx[ my_x[i]-1 ];
+          my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
+          if (creal(elem)==NA_REAL && cimag(elem) == NA_REAL) my_anyNA = true;
+        }
+      }
+      if (my_anyNA) *anyNA = true;  // naked write ok since just bool and always writing true; and no performance issue as maximum nBatch writes
+    }
+  } break;
   default :
-    error("gather implemented for INTSXP and REALSXP but not '%s'", type2char(TYPEOF(x)));   // # nocov
+    error("gather implemented for INTSXP, REALSXP, and CPLXSXP but not '%s'", type2char(TYPEOF(x)));   // # nocov
   }
   //Rprintf("gather took %.3fs\n", wallclock()-started);
   return gx;
@@ -428,6 +459,43 @@ SEXP gsum(SEXP x, SEXP narmArg)
       }
     }
   } break;
+  case CPLXSXP: {
+    const double complex *restrict gx = gather(x, &anyNA);
+    ans = PROTECT(allocVector(CPLXSXP, ngrp));
+    double complex *restrict ansp = (double complex *)COMPLEX(ans);
+    memset(ansp, 0, ngrp*sizeof(double complex));
+    if (!narm || !anyNA) {
+      #pragma omp parallel for num_threads(getDTthreads())
+      for (int h=0; h<highSize; h++) {
+        double complex *restrict _ans = ansp + (h<<shift);
+        for (int b=0; b<nBatch; b++) {
+          const int pos = counts[ b*highSize + h ];
+          const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
+          const double complex *my_gx = gx + b*batchSize + pos;
+          const uint16_t *my_low = low + b*batchSize + pos;
+          for (int i=0; i<howMany; i++) {
+            _ans[my_low[i]] += my_gx[i];  // let NA propagate when !narm
+          }
+        }
+      }
+    } else {
+      // narm==true and anyNA==true
+      #pragma omp parallel for num_threads(getDTthreads())
+      for (int h=0; h<highSize; h++) {
+        double complex *restrict _ans = ansp + (h<<shift);
+        for (int b=0; b<nBatch; b++) {
+          const int pos = counts[ b*highSize + h ];
+          const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
+          const double complex *my_gx = gx + b*batchSize + pos;
+          const uint16_t *my_low = low + b*batchSize + pos;
+          for (int i=0; i<howMany; i++) {
+            const double complex elem = my_gx[i];
+            if (!ISNAN(elem)) _ans[my_low[i]] += elem;
+          }
+        }
+      }
+    }
+  } break;
   default:
     error("Type '%s' not supported by GForce sum (gsum). Either add the prefix base::sum(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x)));
   }
@@ -454,6 +522,10 @@ SEXP gmean(SEXP x, SEXP narm)
       double *xd = REAL(ans);
       for (int i=0; i<ngrp; i++) *xd++ /= grpsize[i];  // let NA propogate
     } break;
+    case CPLXSXP: {
+      double complex *xd = (double complex *)COMPLEX(ans);
+      for (int i=0; i<ngrp; i++) *xd++ /= grpsize[i];
+    } break;
     default :
       error("Internal error: gsum returned type '%s'. typeof(x) is '%s'", type2char(TYPEOF(ans)), type2char(TYPEOF(x))); // # nocov
     }
@@ -491,18 +563,45 @@ SEXP gmean(SEXP x, SEXP narm)
       c[thisgrp]++;
     }
   } break;
+  case CPLXSXP: {
+    const double complex *xd = (double complex *)COMPLEX(x);
+    for (int i=0; i<n; i++) {
+      int thisgrp = grp[i];
+      int ix = (irowslen == -1) ? i : irows[i]-1;
+      if (ISNAN(xd[ix])) continue;
+      s[thisgrp] += xd[ix];
+      c[thisgrp]++;
+    }
+  } break;
   default:
     free(s); free(c); // # nocov because it already stops at gsum, remove nocov if gmean will support a type that gsum wont
     error("Type '%s' not supported by GForce mean (gmean) na.rm=TRUE. Either add the prefix base::mean(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); // # nocov
   }
-  ans = PROTECT(allocVector(REALSXP, ngrp));
-  double *ansd = REAL(ans);
-  for (int i=0; i<ngrp; i++) {
-    if (c[i]==0) { ansd[i] = R_NaN; continue; }  // NaN to follow base::mean
-    s[i] /= c[i];
-    if (s[i] > DBL_MAX) ansd[i] = R_PosInf;
-    else if (s[i] < -DBL_MAX) ansd[i] = R_NegInf;
-    else ansd[i] = (double)s[i];
+  switch(TYPEOF(x)) {
+  case LGLSXP: case INTSXP: case REALSXP: {
+    ans = PROTECT(allocVector(REALSXP, ngrp));
+    double *ansd = REAL(ans);
+    for (int i=0; i<ngrp; i++) {
+      if (c[i]==0) { ansd[i] = R_NaN; continue; }  // NaN to follow base::mean
+      s[i] /= c[i];
+      if (s[i] > DBL_MAX) ansd[i] = R_PosInf;
+      else if (s[i] < -DBL_MAX) ansd[i] = R_NegInf;
+      else ansd[i] = (double)s[i];
+    }
+  } break;
+  case CPLXSXP: {
+  ans = PROTECT(allocVector(CPLXSXP, ngrp));
+    double complex *ansd = (double complex *)COMPLEX(ans);
+    for (int i=0; i<ngrp; i++) {
+      if (c[i]==0) { ansd[i] = R_NaN + R_NaN*1i; continue; }  // NaN to follow base::mean
+      s[i] /= c[i];
+      if (creal(s[i]) > DBL_MAX) ansd[i] = R_PosInf + cimag(s[i])*I;
+      else if (creal(s[i]) < -DBL_MAX) ansd[i] = R_NegInf + cimag(s[i])*I;
+      if (cimag(s[i]) > DBL_MAX) ansd[i] = creal(s[i]) + R_PosInf*I;
+      else if (cimag(s[i]) < -DBL_MAX) ansd[i] = creal(s[i]) + R_NegInf*I;
+      else ansd[i] = (double complex)s[i];
+    }
+  }
   }
   free(s); free(c);
   copyMostAttrib(x, ans);
@@ -618,6 +717,9 @@ SEXP gmin(SEXP x, SEXP narm)
       }
     }
     break;
+  case CPLXSXP:
+    error("Type 'complex' has no well-defined min");
+    break;
   default:
     error("Type '%s' not supported by GForce min (gmin). Either add the prefix base::min(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x)));
   }
@@ -761,6 +863,9 @@ SEXP gmax(SEXP x, SEXP narm)
       }
     }
     break;
+  case CPLXSXP:
+    error("Type 'complex' has no well-defined max");
+    break;
   default:
     error("Type '%s' not supported by GForce max (gmax). Either add the prefix base::max(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x)));
   }
@@ -868,6 +973,17 @@ SEXP glast(SEXP x) {
     }
   }
     break;
+  case CPLXSXP: {
+    const double complex *dx = (double complex *)COMPLEX(x);
+    ans = PROTECT(allocVector(CPLXSXP, ngrp));
+    double complex *dans = (double complex *)COMPLEX(ans);
+    for (i=0; i<ngrp; i++) {
+      k = ff[i]+grpsize[i]-2;
+      if (isunsorted) k = oo[k]-1;
+      k = (irowslen == -1) ? k : irows[k]-1;
+      dans[i] = dx[k];
+    }
+  } break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp));
     for (i=0; i<ngrp; i++) {
@@ -939,6 +1055,17 @@ SEXP gfirst(SEXP x) {
     }
   }
     break;
+  case CPLXSXP: {
+    const double complex *dx = (double complex *)COMPLEX(x);
+    ans = PROTECT(allocVector(CPLXSXP, ngrp));
+    double complex *dans = (double complex *)COMPLEX(ans);
+    for (i=0; i<ngrp; i++) {
+      k = ff[i]-1;
+      if (isunsorted) k = oo[k]-1;
+      k = (irowslen == -1) ? k : irows[k]-1;
+      dans[i] = dx[k];
+    }
+  } break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp));
     for (i=0; i<ngrp; i++) {
@@ -1022,6 +1149,18 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
     }
   }
     break;
+  case CPLXSXP: {
+    const double complex *dx = (double complex *)COMPLEX(x);
+    ans = PROTECT(allocVector(CPLXSXP, ngrp));
+    double complex *dans = (double complex *)COMPLEX(ans);
+    for (i=0; i<ngrp; i++) {
+      if (val > grpsize[i]) { COMPLEX(ans)[i].r = NA_REAL; COMPLEX(ans)[i].i = NA_REAL; continue; }
+      k = ff[i]+val-2;
+      if (isunsorted) k = oo[k]-1;
+      k = (irowslen == -1) ? k : irows[k]-1;
+      dans[i] = dx[k];
+    }
+  } break;
   case STRSXP:
     ans = PROTECT(allocVector(STRSXP, ngrp));
     for (i=0; i<ngrp; i++) {

From ec6d3682ab9bb2432609fd7b05a8b7cee00704f7 Mon Sep 17 00:00:00 2001
From: Michael Chirico <MichaelChirico4@gmail.com>
Date: Thu, 11 Jul 2019 08:38:18 +0800
Subject: [PATCH 07/20] add some tests

---
 inst/tests/tests.Rraw | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 1c720d4de9..dae67ed6bc 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -13569,7 +13569,6 @@ test(1967.78, x[1:5, sum(v), by = list(i5 = 1:5 %% 2L), verbose = TRUE],
 
 # gforce integer overflow coerce to double
 DT = data.table(A=1:5, B=-3i, C=2147483647L)
-test(1968.1, DT[, sum(B), by=A%%2L], error="Type 'complex' not supported by GForce sum (gsum). Either add the")
 test(1968.2, storage.mode(DT$C), "integer")
 test(1968.3, DT[, sum(C), by=A%%2L], data.table(A=c(1L,0L), V1=c(6442450941, 4294967294)),
              warning="sum.*integer column.*more than type 'integer' can hold.*coerced to 'numeric'")
@@ -14224,16 +14223,16 @@ if (test_bit64) {
   DT = data.table(id=c(rep(1L,3), rep(2L, 3)), v=bit64::as.integer64(c(1:3, 4L, 5:6)))
   test(2019, DT[2:6, sum(v), id], data.table(id=1:2, V1=bit64::as.integer64(c(5L,15L)))) # gather, case of int64 and irows
 }
-DT = data.table(id = c(1L,1L,2L), v = c(1i, 2i, 3i))
-test(2020.01, DT[, min(v), by=id], error="'complex' not supported by GForce min")
-test(2020.02, DT[, max(v), by=id], error="'complex' not supported by GForce max")
-test(2020.03, DT[, median(v), by=id], error="'complex' not supported by GForce median")
-test(2020.04, DT[, head(v, 1), by=id], error="'complex' not supported by GForce head")
-test(2020.05, DT[, tail(v, 1), by=id], error="'complex' not supported by GForce tail")
-test(2020.06, DT[, v[1], by=id], error="'complex' not supported by GForce subset")
-test(2020.07, DT[, sd(v), by=id], error="'complex' not supported by GForce sd")
-test(2020.08, DT[, var(v), by=id], error="'complex' not supported by GForce var")
-test(2020.09, DT[, prod(v), by=id], error="'complex' not supported by GForce prod")
+DT = data.table(id = c(1L,1L,2L), v = as.raw(0:2))
+test(2020.01, DT[, min(v), by=id], error="'raw' not supported by GForce min")
+test(2020.02, DT[, max(v), by=id], error="'raw' not supported by GForce max")
+test(2020.03, DT[, median(v), by=id], error="'raw' not supported by GForce median")
+test(2020.04, DT[, head(v, 1), by=id], error="'raw' not supported by GForce head")
+test(2020.05, DT[, tail(v, 1), by=id], error="'raw' not supported by GForce tail")
+test(2020.06, DT[, v[1], by=id], error="'raw' not supported by GForce subset")
+test(2020.07, DT[, sd(v), by=id], error="'raw' not supported by GForce sd")
+test(2020.08, DT[, var(v), by=id], error="'raw' not supported by GForce var")
+test(2020.09, DT[, prod(v), by=id], error="'raw' not supported by GForce prod")
 DT = data.table(id = c(1L,1L,2L,2L), v = c(1L, 2L, NA, NA))
 test(2020.10, DT[, median(v), id], data.table(id=1:2, V1=c(1.5, NA))) # median whole group has NAs
 
@@ -15200,6 +15199,16 @@ d2 = data.table(r = 1:5, s = seq(0L, 20L, 5L))
 test(2062.1, d1[d2, on = .(a <= s, b >= s), j = .SD],   ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20)))
 test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans)
 
+# GForce for complex columns, part of #3690
+DT = data.table(id=c(1L,1L,2L), v=c(1i, 2i, 3i))
+test(2063.01, DT[, min(v), by=id], error="'complex' has no well-defined min")
+test(2063.02, DT[, max(v), by=id], error="'complex' has no well-defined max")
+test(2063.03, DT[, head(v, 1), by=id], data.table(id=1:2, V1=c(1, 3)*1i))
+test(2063.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i))
+test(2063.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA)))
+## former test 1968.1
+DT = data.table(A=1:5, B=-3i, C=2147483647L)
+test(2063.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i)))
 
 ###################################
 #  Add new tests above this line  #

From 1c1a97e894c291561974913fcfd24f986cb6fa92 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sun, 14 Jul 2019 04:20:42 +0800
Subject: [PATCH 08/20] move to Rcomplex API

---
 src/data.table.h |  7 -----
 src/gsumm.c      | 73 ++++++++++++++++++++++++++++--------------------
 2 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/src/data.table.h b/src/data.table.h
index 052086a9fb..a2a6204051 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -63,13 +63,6 @@ typedef R_xlen_t RLEN;
 #define ALTREP(x) 0  // for R<3.5.0, see issue #2866 and grep for "ALTREP" to see comments where it's used
 #endif
 
-// for complex type support; copied from r-source/src/main/Rcomplex.h
-#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32))
-# undef  I
-# define I (__extension__ 1.0iF)
-#endif
-
-
 // init.c
 SEXP char_integer64;
 SEXP char_ITime;
diff --git a/src/gsumm.c b/src/gsumm.c
index c327988f72..79bef6767f 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -1,6 +1,5 @@
 #include "data.table.h"
 //#include <time.h>
-#include <complex.h>
 
 static int ngrp = 0;         // number of groups
 static int *grpsize = NULL;  // size of each group, used by gmean (and gmedian) not gsum
@@ -300,30 +299,30 @@ void *gather(SEXP x, bool *anyNA)
     }
   } break;
   case CPLXSXP: {
-    const double complex *restrict thisx = (double complex *)COMPLEX(x);
+    const Rcomplex *restrict thisx = COMPLEX(x);
     #pragma omp parallel for num_threads(getDTthreads())
     for (int b=0; b<nBatch; b++) {
       int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
-      memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(double complex));   // original cumulated   // already cumulated for this batch
-      double complex *restrict my_gx = (double complex *)gx + b*batchSize;
+      memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(Rcomplex));   // original cumulated   // already cumulated for this batch
+      Rcomplex *restrict my_gx = (Rcomplex *)gx + b*batchSize;
       const uint16_t *my_high = high + b*batchSize;
       const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;
       bool my_anyNA = false;
       if (irowslen==-1) {
-        const double complex *my_x = thisx + b*batchSize;
+        const Rcomplex *my_x = thisx + b*batchSize;
         for (int i=0; i<howMany; i++) {
-          const double complex elem = my_x[i];
+          const Rcomplex elem = my_x[i];
           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
           // typically just checking one component would be enough,
           //   but ?complex suggests there may be some edge cases; better to be safe
-          if (creal(elem)==NA_REAL && cimag(elem) == NA_REAL) my_anyNA = true;
+          if (elem.r==NA_REAL && elem.i == NA_REAL) my_anyNA = true;
         }
       } else {
         const int *my_x = irows + b*batchSize;
         for (int i=0; i<howMany; i++) {
-          double complex elem = thisx[ my_x[i]-1 ];
+          Rcomplex elem = thisx[ my_x[i]-1 ];
           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
-          if (creal(elem)==NA_REAL && cimag(elem) == NA_REAL) my_anyNA = true;
+          if (elem.rNA_REAL && elem.i == NA_REAL) my_anyNA = true;
         }
       }
       if (my_anyNA) *anyNA = true;  // naked write ok since just bool and always writing true; and no performance issue as maximum nBatch writes
@@ -460,18 +459,18 @@ SEXP gsum(SEXP x, SEXP narmArg)
     }
   } break;
   case CPLXSXP: {
-    const double complex *restrict gx = gather(x, &anyNA);
+    const Rcomplex *restrict gx = gather(x, &anyNA);
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
-    double complex *restrict ansp = (double complex *)COMPLEX(ans);
-    memset(ansp, 0, ngrp*sizeof(double complex));
+    Rcomplex *restrict ansp = COMPLEX(ans);
+    memset(ansp, 0, ngrp*sizeof(Rcomplex));
     if (!narm || !anyNA) {
       #pragma omp parallel for num_threads(getDTthreads())
       for (int h=0; h<highSize; h++) {
-        double complex *restrict _ans = ansp + (h<<shift);
+        Rcomplex *restrict _ans = ansp + (h<<shift);
         for (int b=0; b<nBatch; b++) {
           const int pos = counts[ b*highSize + h ];
           const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
-          const double complex *my_gx = gx + b*batchSize + pos;
+          const Rcomplex *my_gx = gx + b*batchSize + pos;
           const uint16_t *my_low = low + b*batchSize + pos;
           for (int i=0; i<howMany; i++) {
             _ans[my_low[i]] += my_gx[i];  // let NA propagate when !narm
@@ -482,14 +481,14 @@ SEXP gsum(SEXP x, SEXP narmArg)
       // narm==true and anyNA==true
       #pragma omp parallel for num_threads(getDTthreads())
       for (int h=0; h<highSize; h++) {
-        double complex *restrict _ans = ansp + (h<<shift);
+        Rcomplex *restrict _ans = ansp + (h<<shift);
         for (int b=0; b<nBatch; b++) {
           const int pos = counts[ b*highSize + h ];
           const int howMany = ((h==highSize-1) ? (b==nBatch-1?lastBatchSize:batchSize) : counts[ b*highSize + h + 1 ]) - pos;
-          const double complex *my_gx = gx + b*batchSize + pos;
+          const Rcomplex *my_gx = gx + b*batchSize + pos;
           const uint16_t *my_low = low + b*batchSize + pos;
           for (int i=0; i<howMany; i++) {
-            const double complex elem = my_gx[i];
+            const Rcomplex elem = my_gx[i];
             if (!ISNAN(elem)) _ans[my_low[i]] += elem;
           }
         }
@@ -523,7 +522,7 @@ SEXP gmean(SEXP x, SEXP narm)
       for (int i=0; i<ngrp; i++) *xd++ /= grpsize[i];  // let NA propogate
     } break;
     case CPLXSXP: {
-      double complex *xd = (double complex *)COMPLEX(ans);
+      Rcomplex *xd = COMPLEX(ans);
       for (int i=0; i<ngrp; i++) *xd++ /= grpsize[i];
     } break;
     default :
@@ -564,7 +563,7 @@ SEXP gmean(SEXP x, SEXP narm)
     }
   } break;
   case CPLXSXP: {
-    const double complex *xd = (double complex *)COMPLEX(x);
+    const Rcomplex *xd = COMPLEX(x);
     for (int i=0; i<n; i++) {
       int thisgrp = grp[i];
       int ix = (irowslen == -1) ? i : irows[i]-1;
@@ -591,15 +590,27 @@ SEXP gmean(SEXP x, SEXP narm)
   } break;
   case CPLXSXP: {
   ans = PROTECT(allocVector(CPLXSXP, ngrp));
-    double complex *ansd = (double complex *)COMPLEX(ans);
+    Rcomplex *ansd = COMPLEX(ans);
     for (int i=0; i<ngrp; i++) {
       if (c[i]==0) { ansd[i] = R_NaN + R_NaN*1i; continue; }  // NaN to follow base::mean
       s[i] /= c[i];
-      if (creal(s[i]) > DBL_MAX) ansd[i] = R_PosInf + cimag(s[i])*I;
-      else if (creal(s[i]) < -DBL_MAX) ansd[i] = R_NegInf + cimag(s[i])*I;
-      if (cimag(s[i]) > DBL_MAX) ansd[i] = creal(s[i]) + R_PosInf*I;
-      else if (cimag(s[i]) < -DBL_MAX) ansd[i] = creal(s[i]) + R_NegInf*I;
-      else ansd[i] = (double complex)s[i];
+      if (s[i].r > DBL_MAX) {
+        Rcomplex REAL_TO_POS_INF = {R_PosInf, s[i].i};
+        ansd[i] = REAL_TO_POS_INF;
+      }
+      else if (s[i].r < -DBL_MAX) {
+        Rcomplex REAL_TO_NEG_INF = {R_NegInf, s[i].i};
+        ansd[i] = REAL_TO_NEG_INF;
+      }
+      if (s[i].i > DBL_MAX) {
+        Rcomplex IMAG_TO_POS_INF = {s[i].r, R_PosInf};
+        ansd[i] = IMAG_TO_POS_INF;
+      }
+      else if (s[i].i < -DBL_MAX) {
+        Rcomplex IMAG_TO_NEG_INF = {s[i].r, R_NegInf};
+        ansd[i] = IMAG_TO_NEG_INF;
+      }
+      else ansd[i] = s[i];
     }
   }
   }
@@ -974,9 +985,9 @@ SEXP glast(SEXP x) {
   }
     break;
   case CPLXSXP: {
-    const double complex *dx = (double complex *)COMPLEX(x);
+    const Rcomplex *dx = COMPLEX(x);
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
-    double complex *dans = (double complex *)COMPLEX(ans);
+    Rcomplex *dans = COMPLEX(ans);
     for (i=0; i<ngrp; i++) {
       k = ff[i]+grpsize[i]-2;
       if (isunsorted) k = oo[k]-1;
@@ -1056,9 +1067,9 @@ SEXP gfirst(SEXP x) {
   }
     break;
   case CPLXSXP: {
-    const double complex *dx = (double complex *)COMPLEX(x);
+    const Rcomplex *dx = COMPLEX(x);
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
-    double complex *dans = (double complex *)COMPLEX(ans);
+    Rcomplex *dans = COMPLEX(ans);
     for (i=0; i<ngrp; i++) {
       k = ff[i]-1;
       if (isunsorted) k = oo[k]-1;
@@ -1150,9 +1161,9 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
   }
     break;
   case CPLXSXP: {
-    const double complex *dx = (double complex *)COMPLEX(x);
+    const Rcomplex *dx = COMPLEX(x);
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
-    double complex *dans = (double complex *)COMPLEX(ans);
+    Rcomplex *dans = COMPLEX(ans);
     for (i=0; i<ngrp; i++) {
       if (val > grpsize[i]) { COMPLEX(ans)[i].r = NA_REAL; COMPLEX(ans)[i].i = NA_REAL; continue; }
       k = ff[i]+val-2;

From c2aa0031bb363d7297cd65de4e05ce10a88da1f2 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 16 Jul 2019 18:05:42 -0700
Subject: [PATCH 09/20] got it working: use ISNAN not ==NA_REAL, and separate s
 and si in gmean

---
 src/gsumm.c | 55 +++++++++++++++++++++++------------------------------
 1 file changed, 24 insertions(+), 31 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index 79bef6767f..eb85a1b907 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -315,14 +315,14 @@ void *gather(SEXP x, bool *anyNA)
           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
           // typically just checking one component would be enough,
           //   but ?complex suggests there may be some edge cases; better to be safe
-          if (elem.r==NA_REAL && elem.i == NA_REAL) my_anyNA = true;
+          if (ISNAN(elem.r) && ISNAN(elem.i)) my_anyNA = true;
         }
       } else {
         const int *my_x = irows + b*batchSize;
         for (int i=0; i<howMany; i++) {
           Rcomplex elem = thisx[ my_x[i]-1 ];
           my_gx[ my_tmpcounts[my_high[i]]++ ] = elem;
-          if (elem.rNA_REAL && elem.i == NA_REAL) my_anyNA = true;
+          if (ISNAN(elem.r) && ISNAN(elem.i)) my_anyNA = true;
         }
       }
       if (my_anyNA) *anyNA = true;  // naked write ok since just bool and always writing true; and no performance issue as maximum nBatch writes
@@ -473,7 +473,8 @@ SEXP gsum(SEXP x, SEXP narmArg)
           const Rcomplex *my_gx = gx + b*batchSize + pos;
           const uint16_t *my_low = low + b*batchSize + pos;
           for (int i=0; i<howMany; i++) {
-            _ans[my_low[i]] += my_gx[i];  // let NA propagate when !narm
+            _ans[my_low[i]].r += my_gx[i].r;  // let NA propagate when !narm
+            _ans[my_low[i]].i += my_gx[i].i;
           }
         }
       }
@@ -489,7 +490,8 @@ SEXP gsum(SEXP x, SEXP narmArg)
           const uint16_t *my_low = low + b*batchSize + pos;
           for (int i=0; i<howMany; i++) {
             const Rcomplex elem = my_gx[i];
-            if (!ISNAN(elem)) _ans[my_low[i]] += elem;
+            if (!ISNAN(elem.r)) _ans[my_low[i]].r += elem.r;
+            if (!ISNAN(elem.i)) _ans[my_low[i]].i += elem.i;
           }
         }
       }
@@ -523,7 +525,11 @@ SEXP gmean(SEXP x, SEXP narm)
     } break;
     case CPLXSXP: {
       Rcomplex *xd = COMPLEX(ans);
-      for (int i=0; i<ngrp; i++) *xd++ /= grpsize[i];
+      for (int i=0; i<ngrp; i++) {
+        xd->i /= grpsize[i];
+        xd->r /= grpsize[i];
+        xd++;
+      }
     } break;
     default :
       error("Internal error: gsum returned type '%s'. typeof(x) is '%s'", type2char(TYPEOF(ans)), type2char(TYPEOF(x))); // # nocov
@@ -535,7 +541,7 @@ SEXP gmean(SEXP x, SEXP narm)
   const int n = (irowslen == -1) ? length(x) : irowslen;
   if (nrow != n) error("nrow [%d] != length(x) [%d] in gsum", nrow, n);
 
-  long double *s = calloc(ngrp, sizeof(long double));
+  long double *s = calloc(ngrp, sizeof(long double)), *si=NULL;  // s = sum; si = sum imaginary just for complex
   if (!s) error("Unable to allocate %d * %d bytes for sum in gmean na.rm=TRUE", ngrp, sizeof(long double));
 
   int *c = calloc(ngrp, sizeof(int));
@@ -564,11 +570,14 @@ SEXP gmean(SEXP x, SEXP narm)
   } break;
   case CPLXSXP: {
     const Rcomplex *xd = COMPLEX(x);
+    long double *si = calloc(ngrp, sizeof(long double));
+    if (!si) error("Unable to allocate %d * %d bytes for si in gmean na.rm=TRUE", ngrp, sizeof(long double));
     for (int i=0; i<n; i++) {
       int thisgrp = grp[i];
       int ix = (irowslen == -1) ? i : irows[i]-1;
-      if (ISNAN(xd[ix])) continue;
-      s[thisgrp] += xd[ix];
+      if (ISNAN(xd[ix].r) || ISNAN(xd[ix].i)) continue;  // || otherwise we'll need two counts in two c's
+      s[thisgrp] += xd[ix].r;
+      si[thisgrp] += xd[ix].i;
       c[thisgrp]++;
     }
   } break;
@@ -583,38 +592,22 @@ SEXP gmean(SEXP x, SEXP narm)
     for (int i=0; i<ngrp; i++) {
       if (c[i]==0) { ansd[i] = R_NaN; continue; }  // NaN to follow base::mean
       s[i] /= c[i];
-      if (s[i] > DBL_MAX) ansd[i] = R_PosInf;
-      else if (s[i] < -DBL_MAX) ansd[i] = R_NegInf;
-      else ansd[i] = (double)s[i];
+      ansd[i] = s[i]>DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]);
     }
   } break;
   case CPLXSXP: {
-  ans = PROTECT(allocVector(CPLXSXP, ngrp));
+    ans = PROTECT(allocVector(CPLXSXP, ngrp));
     Rcomplex *ansd = COMPLEX(ans);
     for (int i=0; i<ngrp; i++) {
-      if (c[i]==0) { ansd[i] = R_NaN + R_NaN*1i; continue; }  // NaN to follow base::mean
+      if (c[i]==0) { ansd[i].r = R_NaN; ansd[i].i = R_NaN; continue; }
       s[i] /= c[i];
-      if (s[i].r > DBL_MAX) {
-        Rcomplex REAL_TO_POS_INF = {R_PosInf, s[i].i};
-        ansd[i] = REAL_TO_POS_INF;
-      }
-      else if (s[i].r < -DBL_MAX) {
-        Rcomplex REAL_TO_NEG_INF = {R_NegInf, s[i].i};
-        ansd[i] = REAL_TO_NEG_INF;
-      }
-      if (s[i].i > DBL_MAX) {
-        Rcomplex IMAG_TO_POS_INF = {s[i].r, R_PosInf};
-        ansd[i] = IMAG_TO_POS_INF;
-      }
-      else if (s[i].i < -DBL_MAX) {
-        Rcomplex IMAG_TO_NEG_INF = {s[i].r, R_NegInf};
-        ansd[i] = IMAG_TO_NEG_INF;
-      }
-      else ansd[i] = s[i];
+      si[i] /= c[i];
+      ansd[i].r = s[i] >DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]);
+      ansd[i].i = si[i]>DBL_MAX ? R_PosInf : (si[i]< -DBL_MAX ? R_NegInf : (double)si[i]);
     }
   }
   }
-  free(s); free(c);
+  free(s); free(si); free(c);
   copyMostAttrib(x, ans);
   UNPROTECT(1);
   // Rprintf("this gmean na.rm=TRUE took %8.3f\n", 1.0*(clock()-start)/CLOCKS_PER_SEC);

From b3704280a91494857956e11bb46b42691dbef38e Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 16 Jul 2019 18:06:35 -0700
Subject: [PATCH 10/20] comment tweak

---
 src/gsumm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index eb85a1b907..8670cafa1a 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -575,7 +575,7 @@ SEXP gmean(SEXP x, SEXP narm)
     for (int i=0; i<n; i++) {
       int thisgrp = grp[i];
       int ix = (irowslen == -1) ? i : irows[i]-1;
-      if (ISNAN(xd[ix].r) || ISNAN(xd[ix].i)) continue;  // || otherwise we'll need two counts in two c's
+      if (ISNAN(xd[ix].r) || ISNAN(xd[ix].i)) continue;  // || otherwise we'll need two counts in two c's too?
       s[thisgrp] += xd[ix].r;
       si[thisgrp] += xd[ix].i;
       c[thisgrp]++;

From 940246fff4f47b90bcb7f0820bcf0b808cdd20c1 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 16 Jul 2019 18:31:35 -0700
Subject: [PATCH 11/20] no need for complex.h in dogroups.c

---
 inst/tests/tests.Rraw |  1 +
 src/dogroups.c        | 24 ++++++++----------------
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b349c887fa..7598b56047 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15308,6 +15308,7 @@ test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA)))
 DT = data.table(A=1:5, B=-3i, C=2147483647L)
 test(2066.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i)))
 
+test(9999, 3L, 3L) # dummy test
 
 ###################################
 #  Add new tests above this line  #
diff --git a/src/dogroups.c b/src/dogroups.c
index 849a9472af..5f8ee1a61f 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -2,19 +2,12 @@
 #include <Rdefines.h>
 #include <fcntl.h>
 #include <time.h>
-#include <complex.h>
-
-// copied from r-source/src/main/Rcomplex.h
-#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32))
-# undef  I
-# define I (__extension__ 1.0iF)
-#endif
 
 SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose)
 {
   R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0;
   int protecti=0;
-  SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, DOTI, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp;
+  SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp;
   Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE;
   clock_t tstart=0, tblock[10]={0}; int nblock[10]={0};
 
@@ -59,8 +52,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   for (R_len_t i=0; i<n; ++i) {
     if (ilens[i] > maxGrpSize) maxGrpSize = ilens[i];
   }
-  // #3639 introduced complex.h, which defines I, so use DOTI to differentiate
-  defineVar(install(".I"), DOTI = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++;
+  defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++;
   R_LockBinding(install(".I"), env);
 
   dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; // added here to fix #4990 - `:=` did not issue recycling warning during "by"
@@ -84,7 +76,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
     copyMostAttrib(VECTOR_ELT(dt,INTEGER(dtcols)[i]-1), VECTOR_ELT(SDall,i));  // not names, otherwise test 778 would fail
   }
 
-  origIlen = length(DOTI);  // test 762 has length(DOTI)==1 but nrow(SD)==0
+  origIlen = length(I);  // test 762 has length(I)==1 but nrow(SD)==0
   if (length(SDall)) origSDnrow = length(VECTOR_ELT(SDall, 0));
 
   xknames = getAttrib(xSD, R_NamesSymbol);
@@ -165,8 +157,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
         }
       }
       grpn = 1;  // it may not be 1 e.g. test 722. TODO: revisit.
-      SETLENGTH(DOTI, grpn);
-      INTEGER(DOTI)[0] = 0;
+      SETLENGTH(I, grpn);
+      INTEGER(I)[0] = 0;
       for (int j=0; j<length(xSD); ++j) {
         switch (TYPEOF(VECTOR_ELT(xSD, j))) {
         case LGLSXP :
@@ -193,8 +185,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
       }
     } else {
       if (LOGICAL(verbose)[0]) tstart = clock();
-      SETLENGTH(DOTI, grpn);
-      int *iI = INTEGER(DOTI);
+      SETLENGTH(I, grpn);
+      int *iI = INTEGER(I);
       if (LENGTH(order)==0) {
         if (grpn) rownum = istarts[i]-1; else rownum = -1;  // not ternary to pass strict-barrier
         for (int j=0; j<grpn; ++j) iI[j] = rownum+j+1;
@@ -484,7 +476,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   } else ans = R_NilValue;
   // Now reset length of .SD columns and .I to length of largest group, otherwise leak if the last group is smaller (often is).
   for (int j=0; j<length(SDall); ++j) SETLENGTH(VECTOR_ELT(SDall,j), origSDnrow);
-  SETLENGTH(DOTI, origIlen);
+  SETLENGTH(I, origIlen);
   if (LOGICAL(verbose)[0]) {
     if (nblock[0] && nblock[1]) error("Internal error: block 0 [%d] and block 1 [%d] have both run", nblock[0], nblock[1]); // # nocov
     int w = nblock[1]>0;

From 8f6225c2c8103e9c12390fbf5f5eed891aa28c21 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 16 Jul 2019 18:43:43 -0700
Subject: [PATCH 12/20] turned on verbose in last new test 2066.06 to trace
 crash on 32bit i386 Windows

---
 inst/tests/tests.Rraw | 2 +-
 src/gsumm.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 7598b56047..42cc7d87b1 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15306,7 +15306,7 @@ test(2066.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i))
 test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA)))
 ## former test 1968.1
 DT = data.table(A=1:5, B=-3i, C=2147483647L)
-test(2066.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i)))
+test(2066.06, DT[, sum(B), by=A%%2L, verbose=TRUE], data.table(A = 1:0, V1 = c(-9i, -6i)))
 
 test(9999, 3L, 3L) # dummy test
 
diff --git a/src/gsumm.c b/src/gsumm.c
index 8670cafa1a..84ccd83a72 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -508,7 +508,7 @@ SEXP gsum(SEXP x, SEXP narmArg)
 
 SEXP gmean(SEXP x, SEXP narm)
 {
-  SEXP ans;
+  SEXP ans=R_NilValue;
   int protecti=0;
   //clock_t start = clock();
   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error("na.rm must be TRUE or FALSE");

From a53f159badb689d92dc928f88fd5c5ad2fa657d8 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 16 Jul 2019 19:08:24 -0700
Subject: [PATCH 13/20] more tracing

---
 src/gsumm.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index 84ccd83a72..9fd721fabf 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -38,7 +38,8 @@ static int nbit(int n)
 }
 
 SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
-  // double started = wallclock();
+  double started = wallclock();
+  const bool verbose = GetVerbose();
   if (TYPEOF(env) != ENVSXP) error("env is not an environment");
   // The type of jsub is pretty flexbile in R, so leave checking to eval() below.
   if (!isInteger(o)) error("o is not an integer vector");
@@ -94,7 +95,7 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
     int *elem = grp + fp[g]-1;
     for (int j=0; j<grpsize[g]; j++)  elem[j] = g;
   }
-  //Rprintf("gforce initial population of grp took %.3f\n", wallclock()-started); started=wallclock();
+  if (verbose) { Rprintf("gforce initial population of grp took %.3f\n", wallclock()-started); started=wallclock(); }
   if (LENGTH(o)) {
     isunsorted = 1; // for gmedian
 
@@ -189,13 +190,13 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
     // counts is now cumulated within batch (with ending values) and we leave it that way
     // memcpy(counts + b*256, myCounts, 256*sizeof(int));  // save cumulate for later, first bucket contains position of next. For ease later in the very last batch.
   }
-  //Rprintf("gforce assign high and low took %.3f\n", wallclock()-started); started=wallclock();
+  if (verbose) { Rprintf("gforce assign high and low took %.3f\n", wallclock()-started); started=wallclock(); }
 
   oo = INTEGER(o);
   ff = INTEGER(f);
 
   SEXP ans = PROTECT( eval(jsub, env) );
-  //Rprintf("gforce eval took %.3f\n", wallclock()-started);
+  if (verbose) { Rprintf("gforce eval took %.3f\n", wallclock()-started); started=wallclock(); }
   // if this eval() fails with R error, R will release grp for us. Which is why we use R_alloc above.
   if (isVectorAtomic(ans)) {
     SEXP tt = ans;
@@ -211,7 +212,9 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
 
 void *gather(SEXP x, bool *anyNA)
 {
-  //double started = wallclock();
+  double started=wallclock();
+  const bool verbose = GetVerbose();
+  if (verbose) Rprintf("gather took ... ");
   switch (TYPEOF(x)) {
   case LGLSXP: case INTSXP: {
     const int *restrict thisx = INTEGER(x);
@@ -331,7 +334,7 @@ void *gather(SEXP x, bool *anyNA)
   default :
     error("gather implemented for INTSXP, REALSXP, and CPLXSXP but not '%s'", type2char(TYPEOF(x)));   // # nocov
   }
-  //Rprintf("gather took %.3fs\n", wallclock()-started);
+  if (verbose) { Rprintf("%.3fs\n", wallclock()-started); }
   return gx;
 }
 
@@ -341,7 +344,9 @@ SEXP gsum(SEXP x, SEXP narmArg)
   const bool narm = LOGICAL(narmArg)[0];
   if (inherits(x, "factor")) error("sum is not meaningful for factors.");
   const int n = (irowslen == -1) ? length(x) : irowslen;
-  //clock_t start = clock();
+  double started = wallclock();
+  const bool verbose=GetVerbose();
+  if (verbose) Rprintf("This gsum took (narm=%s) ... ", narm?"TRUE":"FALSE");
   if (nrow != n) error("nrow [%d] != length(x) [%d] in gsum", nrow, n);
   bool anyNA=false;
   SEXP ans;
@@ -502,7 +507,7 @@ SEXP gsum(SEXP x, SEXP narmArg)
   }
   copyMostAttrib(x, ans);
   UNPROTECT(1);
-  // Rprintf("this gsum took %8.3f\n", 1.0*(clock()-start)/CLOCKS_PER_SEC);
+  if (verbose) { Rprintf("%.3fs\n", wallclock()-started); }
   return(ans);
 }
 

From c22cd362199340e0a43ac0724711ea253d2afad6 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 16 Jul 2019 19:26:09 -0700
Subject: [PATCH 14/20] more tracing

---
 R/data.table.R | 1 +
 src/gsumm.c    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/R/data.table.R b/R/data.table.R
index fea514aeef..2bcf03b9b0 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1725,6 +1725,7 @@ replace_order = function(isub, verbose, env) {
     #fix for #1683
     if (use.I) assign(".I", seq_len(nrow(x)), thisEnv)
     ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971.
+    if (verbose) { cat("Returned from gforce()\n"); flush.console(); }
     gi = if (length(o__)) o__[f__] else f__
     g = lapply(grpcols, function(i) groups[[i]][gi])
     ans = c(g, ans)
diff --git a/src/gsumm.c b/src/gsumm.c
index 9fd721fabf..2abcda51dd 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -199,6 +199,7 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
   if (verbose) { Rprintf("gforce eval took %.3f\n", wallclock()-started); started=wallclock(); }
   // if this eval() fails with R error, R will release grp for us. Which is why we use R_alloc above.
   if (isVectorAtomic(ans)) {
+    if (verbose) Rprintf("isVectorAtomic(ans)==TRUE\n");
     SEXP tt = ans;
     ans = PROTECT(allocVector(VECSXP, 1));
     SET_VECTOR_ELT(ans, 0, tt);

From 8e761bb0ff0ce3ef52c72d9275c28b909b6fedb2 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 16 Jul 2019 19:41:14 -0700
Subject: [PATCH 15/20] more tracing on i386

---
 R/data.table.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/data.table.R b/R/data.table.R
index 2bcf03b9b0..d0fcc02af8 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1725,10 +1725,13 @@ replace_order = function(isub, verbose, env) {
     #fix for #1683
     if (use.I) assign(".I", seq_len(nrow(x)), thisEnv)
     ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971.
-    if (verbose) { cat("Returned from gforce()\n"); flush.console(); }
+    if (verbose) { cat("Returned from gforce()\n"); flush.console(); print(ans); flush.console(); }
     gi = if (length(o__)) o__[f__] else f__
+    if (verbose) { cat("point 1\n"); flush.console(); }
     g = lapply(grpcols, function(i) groups[[i]][gi])
+    if (verbose) { cat("point 2\n"); flush.console(); }
     ans = c(g, ans)
+    if (verbose) { cat("point 3\n"); flush.console(); }
   } else {
     ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose)
   }

From 07c393ab0c93ca7d2f722260b7762124b0572ba2 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 16 Jul 2019 20:44:19 -0700
Subject: [PATCH 16/20] more tracing

---
 R/data.table.R | 2 ++
 src/gsumm.c    | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index d0fcc02af8..9a2989bd20 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1735,7 +1735,9 @@ replace_order = function(isub, verbose, env) {
   } else {
     ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose)
   }
+  if (verbose) { cat("point 4\n"); flush.console(); }
   if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
+  if (verbose) { cat("point 5\n"); flush.console(); }
   # TO DO: xrows would be a better name for irows: irows means the rows of x that i joins to
   # Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x
   # Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__)
diff --git a/src/gsumm.c b/src/gsumm.c
index 2abcda51dd..b663335891 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -576,7 +576,7 @@ SEXP gmean(SEXP x, SEXP narm)
   } break;
   case CPLXSXP: {
     const Rcomplex *xd = COMPLEX(x);
-    long double *si = calloc(ngrp, sizeof(long double));
+    si = calloc(ngrp, sizeof(long double));
     if (!si) error("Unable to allocate %d * %d bytes for si in gmean na.rm=TRUE", ngrp, sizeof(long double));
     for (int i=0; i<n; i++) {
       int thisgrp = grp[i];
@@ -1164,7 +1164,7 @@ SEXP gnthvalue(SEXP x, SEXP valArg) {
     ans = PROTECT(allocVector(CPLXSXP, ngrp));
     Rcomplex *dans = COMPLEX(ans);
     for (i=0; i<ngrp; i++) {
-      if (val > grpsize[i]) { COMPLEX(ans)[i].r = NA_REAL; COMPLEX(ans)[i].i = NA_REAL; continue; }
+      if (val > grpsize[i]) { dans[i].r = NA_REAL; dans[i].i = NA_REAL; continue; }
       k = ff[i]+val-2;
       if (isunsorted) k = oo[k]-1;
       k = (irowslen == -1) ? k : irows[k]-1;

From 802a42a628f67b3b8304d5907e5b14196224721b Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Tue, 16 Jul 2019 21:41:59 -0700
Subject: [PATCH 17/20] cleared a few rchk messages

---
 src/coalesce.c | 11 +++++++----
 src/gsumm.c    | 10 ++++++----
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/coalesce.c b/src/coalesce.c
index d1cb2bc0aa..e57d63eb4c 100644
--- a/src/coalesce.c
+++ b/src/coalesce.c
@@ -7,6 +7,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) {
     error("Internal error in coalesce.c: argument 'inplaceArg' must be TRUE or FALSE"); // # nocov
   const bool inplace = LOGICAL(inplaceArg)[0];
   const bool verbose = GetVerbose();
+  int nprotect = 0;
   if (length(x)==0) return R_NilValue;
   SEXP first;  // the first vector (it might be the first argument, or it might be the first column of a data.table|frame
   int off = 1; // when x has been pointed to the list of replacement candidates, is the first candidate in position 0 or 1 in the list
@@ -29,21 +30,23 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) {
     if (factor) {
       if (!isFactor(item))
         error("Item 1 is a factor but item %d is not a factor. When factors are involved, all items must be factor.", i+2);
-      if (!R_compute_identical(getAttrib(first, R_LevelsSymbol), getAttrib(item, R_LevelsSymbol), 0))
+      if (!R_compute_identical(PROTECT(getAttrib(first, R_LevelsSymbol)), PROTECT(getAttrib(item, R_LevelsSymbol)), 0))
         error("Item %d is a factor but its levels are not identical to the first item's levels.", i+2);
+      UNPROTECT(2);
     } else {
       if (isFactor(item))
         error("Item %d is a factor but item 1 is not a factor. When factors are involved, all items must be factor.", i+2);
     }
     if (TYPEOF(first) != TYPEOF(item))
       error("Item %d is type %s but the first item is type %s. Please coerce before coalescing.", i+2, type2char(TYPEOF(item)), type2char(TYPEOF(first)));
-    if (!R_compute_identical(getAttrib(first, R_ClassSymbol), getAttrib(item, R_ClassSymbol), 0))
+    if (!R_compute_identical(PROTECT(getAttrib(first, R_ClassSymbol)), PROTECT(getAttrib(item, R_ClassSymbol)), 0))
       error("Item %d has a different class than item 1.", i+2);
+    UNPROTECT(2);
     if (length(item)!=1 && length(item)!=nrow)
       error("Item %d is length %d but the first item is length %d. Only singletons are recycled.", i+2, length(item), nrow);
   }
   if (!inplace) {
-    first = PROTECT(duplicate(first));
+    first = PROTECT(duplicate(first)); nprotect++;
     if (verbose) Rprintf("coalesce copied first item (inplace=FALSE)\n");
   }
   void **valP = (void **)R_alloc(nval, sizeof(void *));
@@ -140,7 +143,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) {
   default:
     error("Unsupported type: %s", type2char(TYPEOF(first))); // e.g. raw is tested
   }
-  if (!inplace) UNPROTECT(1);
+  UNPROTECT(nprotect);
   return first;
 }
 
diff --git a/src/gsumm.c b/src/gsumm.c
index b663335891..208fc61678 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -507,20 +507,20 @@ SEXP gsum(SEXP x, SEXP narmArg)
     error("Type '%s' not supported by GForce sum (gsum). Either add the prefix base::sum(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x)));
   }
   copyMostAttrib(x, ans);
-  UNPROTECT(1);
   if (verbose) { Rprintf("%.3fs\n", wallclock()-started); }
+  UNPROTECT(1);
   return(ans);
 }
 
 SEXP gmean(SEXP x, SEXP narm)
 {
   SEXP ans=R_NilValue;
-  int protecti=0;
   //clock_t start = clock();
   if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error("na.rm must be TRUE or FALSE");
   if (!isVectorAtomic(x)) error("GForce mean can only be applied to columns, not .SD or similar. Likely you're looking for 'DT[,lapply(.SD,mean),by=,.SDcols=]'. See ?data.table.");
   if (inherits(x, "factor")) error("mean is not meaningful for factors.");
   if (!LOGICAL(narm)[0]) {
+    int protecti=0;
     ans = PROTECT(gsum(x,narm)); protecti++;
     switch(TYPEOF(ans)) {
     case LGLSXP: case INTSXP:
@@ -611,12 +611,14 @@ SEXP gmean(SEXP x, SEXP narm)
       ansd[i].r = s[i] >DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]);
       ansd[i].i = si[i]>DBL_MAX ? R_PosInf : (si[i]< -DBL_MAX ? R_NegInf : (double)si[i]);
     }
-  }
+  } break;
+  default:
+    error("Internal error: unsupported type at the end of gmean"); // # nocov
   }
   free(s); free(si); free(c);
   copyMostAttrib(x, ans);
-  UNPROTECT(1);
   // Rprintf("this gmean na.rm=TRUE took %8.3f\n", 1.0*(clock()-start)/CLOCKS_PER_SEC);
+  UNPROTECT(1);
   return(ans);
 }
 

From 06c7089bb8c26d2fb8da71f54a174156620b248d Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Wed, 17 Jul 2019 11:33:01 -0700
Subject: [PATCH 18/20] torture by Rstrict yielded

---
 src/gsumm.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/gsumm.c b/src/gsumm.c
index 208fc61678..69839804a3 100644
--- a/src/gsumm.c
+++ b/src/gsumm.c
@@ -96,6 +96,7 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
     for (int j=0; j<grpsize[g]; j++)  elem[j] = g;
   }
   if (verbose) { Rprintf("gforce initial population of grp took %.3f\n", wallclock()-started); started=wallclock(); }
+  isunsorted = 0;
   if (LENGTH(o)) {
     isunsorted = 1; // for gmedian
 
@@ -157,8 +158,8 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
   low  = (uint16_t *)R_alloc(nrow, sizeof(uint16_t));
   // global ghigh and glow because the g* functions (inside jsub) share this common memory
 
-  gx = (char *)R_alloc(nrow, sizeof(double));  // enough for a copy of one column (or length(irows) if supplied)
-
+  gx = (char *)R_alloc(nrow, sizeof(Rcomplex));  // enough for a copy of one column (or length(irows) if supplied)
+  // TODO: reduce to the largest type present; won't be faster (untouched RAM won't be fetched) but it will increase the largest size that works.
 
   counts = (int *)S_alloc(nBatch*highSize, sizeof(int));  // (S_ zeros) TODO: cache-line align and make highSize a multiple of 64
   tmpcounts = (int *)R_alloc(getDTthreads()*highSize, sizeof(int));
@@ -199,16 +200,13 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) {
   if (verbose) { Rprintf("gforce eval took %.3f\n", wallclock()-started); started=wallclock(); }
   // if this eval() fails with R error, R will release grp for us. Which is why we use R_alloc above.
   if (isVectorAtomic(ans)) {
-    if (verbose) Rprintf("isVectorAtomic(ans)==TRUE\n");
-    SEXP tt = ans;
-    ans = PROTECT(allocVector(VECSXP, 1));
-    SET_VECTOR_ELT(ans, 0, tt);
-    UNPROTECT(1);
+    SEXP tt = PROTECT(allocVector(VECSXP, 1));
+    SET_VECTOR_ELT(tt, 0, ans);
+    UNPROTECT(2);
+    return tt;
   }
-  ngrp = 0; maxgrpn=0; irowslen = -1; isunsorted = 0;
-
   UNPROTECT(1);
-  return(ans);
+  return ans;
 }
 
 void *gather(SEXP x, bool *anyNA)
@@ -307,7 +305,7 @@ void *gather(SEXP x, bool *anyNA)
     #pragma omp parallel for num_threads(getDTthreads())
     for (int b=0; b<nBatch; b++) {
       int *restrict my_tmpcounts = tmpcounts + omp_get_thread_num()*highSize;
-      memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(Rcomplex));   // original cumulated   // already cumulated for this batch
+      memcpy(my_tmpcounts, counts + b*highSize, highSize*sizeof(int));
       Rcomplex *restrict my_gx = (Rcomplex *)gx + b*batchSize;
       const uint16_t *my_high = high + b*batchSize;
       const int howMany = b==nBatch-1 ? lastBatchSize : batchSize;

From 01f1d9cc1660e9078c870f7fde53e93b6f8b72fb Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Wed, 17 Jul 2019 12:21:31 -0700
Subject: [PATCH 19/20] remove tracing

---
 R/data.table.R        | 6 ------
 inst/tests/tests.Rraw | 3 +--
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 9a2989bd20..fea514aeef 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -1725,19 +1725,13 @@ replace_order = function(isub, verbose, env) {
     #fix for #1683
     if (use.I) assign(".I", seq_len(nrow(x)), thisEnv)
     ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971.
-    if (verbose) { cat("Returned from gforce()\n"); flush.console(); print(ans); flush.console(); }
     gi = if (length(o__)) o__[f__] else f__
-    if (verbose) { cat("point 1\n"); flush.console(); }
     g = lapply(grpcols, function(i) groups[[i]][gi])
-    if (verbose) { cat("point 2\n"); flush.console(); }
     ans = c(g, ans)
-    if (verbose) { cat("point 3\n"); flush.console(); }
   } else {
     ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose)
   }
-  if (verbose) { cat("point 4\n"); flush.console(); }
   if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
-  if (verbose) { cat("point 5\n"); flush.console(); }
   # TO DO: xrows would be a better name for irows: irows means the rows of x that i joins to
   # Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x
   # Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 42cc7d87b1..b349c887fa 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15306,9 +15306,8 @@ test(2066.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i))
 test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA)))
 ## former test 1968.1
 DT = data.table(A=1:5, B=-3i, C=2147483647L)
-test(2066.06, DT[, sum(B), by=A%%2L, verbose=TRUE], data.table(A = 1:0, V1 = c(-9i, -6i)))
+test(2066.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i)))
 
-test(9999, 3L, 3L) # dummy test
 
 ###################################
 #  Add new tests above this line  #

From 4f2a97d5869b17ccd4528c2d1e6c6d4ce0a67692 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Wed, 17 Jul 2019 12:56:53 -0700
Subject: [PATCH 20/20] coverage

---
 inst/tests/tests.Rraw | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b349c887fa..6fb289dd54 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15306,7 +15306,13 @@ test(2066.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i))
 test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA)))
 ## former test 1968.1
 DT = data.table(A=1:5, B=-3i, C=2147483647L)
-test(2066.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i)))
+test(2066.06, DT[,    .(sum(B), mean(B)), by=A%%2L], data.table(A=1:0, V1=c(-9i, -6i), V2=-3i))
+test(2066.07, DT[2:4, .(sum(B), mean(B)), by=A%%2L], data.table(A=0:1, V1=c(-6i, -3i), V2=-3i))
+DT[4, B:=NA]
+test(2066.08, DT[,    .(sum(B), mean(B)), by=A%%2L], data.table(A=1:0, V1=c(-9i, NA), V2=c(-3i, NA)))
+test(2066.09, DT[2:4, .(sum(B), mean(B)), by=A%%2L], data.table(A=0:1, V1=c(NA, -3i), V2=c(NA, -3i)))
+test(2066.10, DT[,    .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=1:0, V1=c(-9i, -3i), V2=-3i))
+test(2066.11, DT[2:4, .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=0:1, V1=c(-3i, -3i), V2=-3i))
 
 
 ###################################