From 73a63ca5fe9911c57e9ab6212c3f39911cc8ec33 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Wed, 10 Jul 2019 13:21:15 +0800
Subject: [PATCH 01/25] Closes #1444 -- setkey works on tables with complex
 columns

---
 NEWS.md               |  2 ++
 inst/tests/tests.Rraw |  3 +++
 src/reorder.c         | 21 ++++++++++++++++-----
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 10e3d5d5ea..81a50403ae 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -128,6 +128,8 @@
     identical(y1,y2) && identical(y1,y3)
     # TRUE
     ```
+    
+19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not well-ordered (and thus not supported as keys). Thanks @atalikami for the report.
 
 #### BUG FIXES
 
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 1c720d4de9..838bf70306 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15200,6 +15200,9 @@ d2 = data.table(r = 1:5, s = seq(0L, 20L, 5L))
 test(2062.1, d1[d2, on = .(a <= s, b >= s), j = .SD],   ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20)))
 test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans)
 
+# #1444 -- support for ordering tables with complex columns
+DT = data.table(a = 2:1, z = complex(0, 0:1))
+test(2063, setkey(DT, a), data.table(a = 1:2, z = complex(0, 1:0), key='a'))
 
 ###################################
 #  Add new tests above this line  #
diff --git a/src/reorder.c b/src/reorder.c
index 337d09b70f..62116def88 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -1,4 +1,5 @@
 #include "data.table.h"
+#include <complex.h>
 
 SEXP reorder(SEXP x, SEXP order)
 {
@@ -13,8 +14,8 @@ SEXP reorder(SEXP x, SEXP order)
     ncol = length(x);
     for (int i=0; i<ncol; i++) {
       SEXP v = VECTOR_ELT(x,i);
-      if (SIZEOF(v)!=4 && SIZEOF(v)!=8)
-        error("Item %d of list is type '%s' which isn't yet supported", i+1, type2char(TYPEOF(v)));
+      if (SIZEOF(v)!=4 && SIZEOF(v)!=8 && SIZEOF(v)!=16)
+        error("Item %d of list is type '%s' which isn't yet supported (SIZEOF=%d)", i+1, type2char(TYPEOF(v)), SIZEOF(v));
       if (length(v)!=nrow)
         error("Column %d is length %d which differs from length of column 1 (%d). Invalid data.table.", i+1, length(v), nrow);
       if (SIZEOF(v) > maxSize)
@@ -22,8 +23,8 @@ SEXP reorder(SEXP x, SEXP order)
       if (ALTREP(v)) SET_VECTOR_ELT(x,i,duplicate(v));  // expand compact vector in place ready for reordering by reference
     }
   } else {
-    if (SIZEOF(x)!=4 && SIZEOF(x)!=8)
-      error("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported", type2char(TYPEOF(x)));
+    if (SIZEOF(x)!=4 && SIZEOF(x)!=8 && SIZEOF(x)!=16)
+      error("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%d)", type2char(TYPEOF(x)), SIZEOF(x));
     if (ALTREP(x)) error("Internal error in reorder.c: cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4 and report this as a bug."); // # nocov
     maxSize = SIZEOF(x);
     nrow = length(x);
@@ -68,7 +69,7 @@ SEXP reorder(SEXP x, SEXP order)
         tmp[i] = cvd[idx[i]-1];  // copies 4 bytes; including pointers on 32bit (STRSXP and VECSXP)
       }
       memcpy(vd+start, tmp+start, (end-start+1)*size);
-    } else {
+    } else if (size==8) {
       double *vd = (double *)DATAPTR(v);
       const double *restrict cvd = vd;
       double *restrict tmp = (double *)TMP;
@@ -77,6 +78,16 @@ SEXP reorder(SEXP x, SEXP order)
         tmp[i] = cvd[idx[i]-1];  // copies 8 bytes; including pointers on 64bit (STRSXP and VECSXP)
       }
       memcpy(vd+start, tmp+start, (end-start+1)*size);
+    } else {
+      // #1444 -- support for copying CPLXSXP (which have size 16)
+      double complex *vd = (double complex *)DATAPTR(v);
+      const double complex *restrict cvd = vd;
+      double complex *restrict tmp = (double complex *)TMP;
+      #pragma omp parallel for num_threads(getDTthreads())
+      for (int i=start; i<=end; i++) {
+        tmp[i] = cvd[idx[i]-1];  // copies 16 bytes; including pointers on 64bit (STRSXP and VECSXP)
+      }
+      memcpy(vd+start, tmp+start, (end-start+1)*size);
     }
   }
   // It's ok to ignore write barrier, and in parallel too, only because this reorder() function accepts and checks

From bdce4a5ff5f28ce5ef4d2c6fd8694a4aa156866f Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Wed, 10 Jul 2019 13:33:13 +0800
Subject: [PATCH 02/25] extra test for group operation mentioned in issue

---
 inst/tests/tests.Rraw | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 838bf70306..ccee85a383 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15202,7 +15202,8 @@ test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans)
 
 # #1444 -- support for ordering tables with complex columns
 DT = data.table(a = 2:1, z = complex(0, 0:1))
-test(2063, setkey(DT, a), data.table(a = 1:2, z = complex(0, 1:0), key='a'))
+test(2063.1, setkey(copy(DT), a), data.table(a = 1:2, z = complex(0, 1:0), key='a'))
+test(2063.2, DT[ , abs(z), by = a], data.table(a = 2:1, V1 = c(0, 1)))
 
 ###################################
 #  Add new tests above this line  #

From e6533dca399cc0fcb77ee776f0eb8166d404f7c9 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 12 Jul 2019 00:05:07 +0800
Subject: [PATCH 03/25] new tests for coverage

---
 inst/tests/tests.Rraw | 9 +++++++--
 src/reorder.c         | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index ccee85a383..c86b55b67f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15202,8 +15202,13 @@ test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans)
 
 # #1444 -- support for ordering tables with complex columns
 DT = data.table(a = 2:1, z = complex(0, 0:1))
-test(2063.1, setkey(copy(DT), a), data.table(a = 1:2, z = complex(0, 1:0), key='a'))
-test(2063.2, DT[ , abs(z), by = a], data.table(a = 2:1, V1 = c(0, 1)))
+test(2063.1, setkey(copy(DT), a), data.table(a=1:2, z=complex(0, 1:0), key='a'))
+test(2063.2, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(0, 1)))
+# raw continues not to be supported
+DT = data.table(ID=2:1, r=as.raw(0:1))
+test(2063.3, setkey(DT, ID), error="Item 2 of list is type 'raw'")
+# setreordervec triggers !isNewList branch for coverage
+test(2063.4, setreordervec(DT$r), error="reorder accepts vectors but this non-VECSXP")
 
 ###################################
 #  Add new tests above this line  #
diff --git a/src/reorder.c b/src/reorder.c
index 62116def88..d55d147283 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -124,7 +124,7 @@ SEXP setrev(SEXP x) {
       ((int *)xt)[len-1-j] = *(int *)tmp;
     }
   } else {
-    if (size!=8) error("Size of x isn't 4 or 8");
+    if (size!=8) error("Size of x isn't 4, 8 or 16");
     tmp = (char *)Calloc(1, double);
     if (!tmp) error("unable to allocate temporary working memory for reordering x");
     for (j=0;j<n;j++) {

From c210ede929803bc22f16c7d599c39567fe254a94 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 12 Jul 2019 00:20:24 +0800
Subject: [PATCH 04/25] missing arg

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index c86b55b67f..777bed1706 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15208,7 +15208,7 @@ test(2063.2, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(0, 1)))
 DT = data.table(ID=2:1, r=as.raw(0:1))
 test(2063.3, setkey(DT, ID), error="Item 2 of list is type 'raw'")
 # setreordervec triggers !isNewList branch for coverage
-test(2063.4, setreordervec(DT$r), error="reorder accepts vectors but this non-VECSXP")
+test(2063.4, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP")
 
 ###################################
 #  Add new tests above this line  #

From 55bd50182e1578c5f9e428fa62a5d11ec2d0abfe Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 13 Jul 2019 05:22:07 +0800
Subject: [PATCH 05/25] Closes #1703 -- forderv handles complex input

---
 NEWS.md               |  2 ++
 R/setkey.R            | 34 +++++++++++++++++++++++++++++++++-
 inst/tests/tests.Rraw | 11 +++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 691647cba1..c350ac0393 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -128,6 +128,8 @@
     identical(y1,y2) && identical(y1,y3)
     # TRUE
     ```
+    
+19. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z1<z2` means `Re(z1) < Re(z2) | (Re(z1) == Re(z2) & Im(z1) < Im(z2))`).
 
 #### BUG FIXES
 
diff --git a/R/setkey.R b/R/setkey.R
index f4f18cbc39..6f80f80349 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -196,6 +196,17 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las
     by = NULL
     if ( !missing(order) && (length(order) != 1L || !(order %in% c(1L, -1L))) )
       stop("x is a single vector, length(order) must be =1 and it's value should be 1 (ascending) or -1 (descending).")
+    # we don't expect users to need complex sorting extensively
+    #   or on massive data sets, so we take the approach of
+    #   splitting a complex vector into its real & imaginary parts
+    #   and using the regular forderv machinery to sort; a baremetal
+    #   implementation would at root do the same, but the approach
+    #   here is a bit more slapdash with respect to memory efficiency
+    if (is.complex(x)) {
+      x = list(Re(x), Im(x))
+      by = 1:2
+      order = rep(order, 2L)
+    }
   } else {
     if (!length(x)) return(integer(0L)) # to be consistent with base::order. this'll make sure forderv(NULL) will result in error
                        # (as base does) but forderv(data.table(NULL)) and forderv(list()) will return integer(0L))
@@ -208,9 +219,30 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las
       stop("'by' is type 'double' and one or more items in it are not whole integers")
     }
     by = as.integer(by)
-    if ( (length(order) != 1L && length(order) != length(by)) || any(!order %in% c(1L, -1L)) )
+    if ( (length(order) != 1L && length(order) != length(by)) || !all(order %in% c(1L, -1L)) )
       stop("x is a list, length(order) must be either =1 or =length(by) and each value should be 1 or -1 for each column in 'by', corresponding to ascending or descending order, respectively. If length(order) == 1, it will be recycled to length(by).")
     if (length(order) == 1L) order = rep(order, length(by))
+    is_cplx = vapply_1b(x, is.complex)
+    if (any(is_cplx)) {
+      by_order = order(by)
+      out_x = vector('list', length(x) + sum(is_cplx))
+      # order of order matches by, but is_cplx doesn't
+      order = rep(order, is_cplx[by_order] + 1L)
+      j = 1L
+      for (i in seq_along(is_cplx)) {
+        by_i = by_order[i]
+        if (is_cplx[by_i]) {
+          out_x[[j]] = Re(x[[by_i]])
+          out_x[[j + 1L]] = Im(x[[by_i]])
+          j = j + 2L
+        } else {
+          out_x[[j]] = x[[by_i]]
+          j = j + 1L
+        }
+      }
+      x = out_x; rm(out_x)
+      by = seq_along(x)
+    }
   }
   order = as.integer(order)
   .Call(Cforder, x, by, retGrp, sort, order, na.last)  # returns integer() if already sorted, regardless of sort=TRUE|FALSE
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 0c498e9f76..de4106d582 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15208,6 +15208,17 @@ d2 = data.table(r = 1:5, s = seq(0L, 20L, 5L))
 test(2062.1, d1[d2, on = .(a <= s, b >= s), j = .SD],   ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20)))
 test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans)
 
+# forderv (and downstream functions) handles complex vector input, part of #3690
+DT = data.table(
+  a = c(1L, 1L, 8L, 2L, 1L, 9L, 3L, 2L, 6L, 6L),
+  b = c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i)
+)
+test(2063.1, DT[order(a, b)], DT[base::order(a, b)])
+test(2063.2, DT[order(a, -b)], DT[base::order(a, -b)])
+test(2063.3, forderv(DT$b, order = 1L), base::order(DT$b))
+test(2063.4, forderv(DT$b, order = -1L), base::order(-DT$b))
+test(2063.5, forderv(DT, by = 2:1), forderv(DT[ , 2:1]))
+test(2063.6, forderv(DT, by = 2:1, order = c(1L, -1L)), DT[order(b, -a), which = TRUE])
 
 ###################################
 #  Add new tests above this line  #

From 2e8c9966c7e2177771672ece974c31e9527a9d4f Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 13 Jul 2019 18:15:14 +0800
Subject: [PATCH 06/25] slight re-tooling, now passing tests

---
 R/setkey.R            | 24 +++++++++++++-----------
 inst/tests/tests.Rraw | 15 ++++++++-------
 src/forder.c          |  2 +-
 3 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index 6f80f80349..f46580cbde 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -222,21 +222,23 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las
     if ( (length(order) != 1L && length(order) != length(by)) || !all(order %in% c(1L, -1L)) )
       stop("x is a list, length(order) must be either =1 or =length(by) and each value should be 1 or -1 for each column in 'by', corresponding to ascending or descending order, respectively. If length(order) == 1, it will be recycled to length(by).")
     if (length(order) == 1L) order = rep(order, length(by))
-    is_cplx = vapply_1b(x, is.complex)
+    nn = length(x)
+    is_cplx = vapply_1b(by, function(i) {
+      if (i < 1L) stop("'by' must be positive, received ", i, call. = FALSE)
+      if (i > nn) stop("'by' must be in [1,", nn, "], received ", i, call. = FALSE)
+      is.complex(x[[i]])
+    })
     if (any(is_cplx)) {
-      by_order = order(by)
-      out_x = vector('list', length(x) + sum(is_cplx))
-      # order of order matches by, but is_cplx doesn't
-      order = rep(order, is_cplx[by_order] + 1L)
+      out_x = vector('list', length(by) + sum(is_cplx))
+      order = rep(order, is_cplx + 1L)
       j = 1L
-      for (i in seq_along(is_cplx)) {
-        by_i = by_order[i]
-        if (is_cplx[by_i]) {
-          out_x[[j]] = Re(x[[by_i]])
-          out_x[[j + 1L]] = Im(x[[by_i]])
+      for (i in seq_along(by)) {
+        if (is_cplx[i]) {
+          out_x[[j]] = Re(x[[by[i]]])
+          out_x[[j + 1L]] = Im(x[[by[i]]])
           j = j + 2L
         } else {
-          out_x[[j]] = x[[by_i]]
+          out_x[[j]] = x[[by[i]]]
           j = j + 1L
         }
       }
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index de4106d582..32b1196283 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -2776,7 +2776,7 @@ test(988, unique(dt, by='B'), dt[!duplicated(df[, 'B'])])
 test(989, unique(dt, by='C'), dt[!duplicated(df[, 'C'])])
 test(990, unique(dt, by=c('B', 'C')), dt[!duplicated(df[, c('B', 'C')])])
 test(991, unique(dt, by=NULL), dt[!duplicated(df)])
-test(991.1, unique(dt, by=4), error="'by' value 4 out of range.*1,3")
+test(991.1, unique(dt, by=4), error="'by' must be in [1,3], received 4")
 test(991.2, unique(dt, by=c(1,3.1)), error="'by' is type 'double' and one or more items in it are not whole integers")
 test(991.3, unique(dt, by=2:3), dt[!duplicated(df[,c('B','C')])])
 test(991.4, unique(dt, by=c('C','D','E')), error="'by' contains 'D' which is not a column name")
@@ -11692,11 +11692,11 @@ test(1844.2, forder(DT,V1,V2,na.last=NA), INT(2,1,3,0,4))  # prior to v1.12.0 th
 # now with two NAs in that 2-group covers forder.c:forder line 1269 starting: else if (nalast == 0 && tmp==-2) {
 DT = data.table(c("a","a","a","b","b"),c(2,1,3,NA,NA))
 test(1844.3, forder(DT,V1,V2,na.last=NA), INT(2,1,3,0,0))
-DT = data.table((0+0i)^(-3:3), 7:1)
-test(1844.4, forder(DT,V1,V2), error="Column 1 of by= (1) is type 'complex', not yet supported")
-test(1844.5, forder(DT,V2,V1), error="Column 2 of by= (2) is type 'complex', not yet supported")
-DT = data.table((0+0i)^(-3:3), c(5L,5L,1L,2L,2L,2L,2L))
-test(1844.6, forder(DT,V2,V1), error="Column 2 of by= (2) is type 'complex', not yet supported")
+DT = data.table(as.raw(0:6), 7:1)
+test(1844.4, forder(DT,V1,V2), error="Column 1 of by= (1) is type 'raw', not yet supported")
+test(1844.5, forder(DT,V2,V1), error="Column 2 of by= (2) is type 'raw', not yet supported")
+DT = data.table(as.raw(0:6), c(5L,5L,1L,2L,2L,2L,2L))
+test(1844.6, forder(DT,V2,V1), error="Column 2 of by= (2) is type 'raw', not yet supported")
 
 # fix for non-equi joins issue #1991. Thanks to Henrik for the nice minimal example.
 d1 <- data.table(x = c(rep(c("b", "a", "c"), each = 3), c("a", "b")), y = c(rep(c(1, 3, 6), 3), 6, 6), id = 1:11)
@@ -15211,7 +15211,8 @@ test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans)
 # forderv (and downstream functions) handles complex vector input, part of #3690
 DT = data.table(
   a = c(1L, 1L, 8L, 2L, 1L, 9L, 3L, 2L, 6L, 6L),
-  b = c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i)
+  b = c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i),
+  c = 6
 )
 test(2063.1, DT[order(a, b)], DT[base::order(a, b)])
 test(2063.2, DT[order(a, -b)], DT[base::order(a, -b)])
diff --git a/src/forder.c b/src/forder.c
index 6135260986..d810bf6536 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -442,7 +442,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
   nrow = length(VECTOR_ELT(DT,0));
   for (int i=0; i<LENGTH(by); i++) {
     if (INTEGER(by)[i] < 1 || INTEGER(by)[i] > length(DT))
-      error("'by' value %d out of range [1,%d]", INTEGER(by)[i], length(DT));
+      error("Internal error: 'by' out of range should have been caught earlier"); // #nocov
     if ( nrow != length(VECTOR_ELT(DT, INTEGER(by)[i]-1)) )
       error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow);
   }

From e3a6aa6eead80dec8805bd53716cc6e30b100a02 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sat, 13 Jul 2019 22:22:39 +0800
Subject: [PATCH 07/25] more progress; but stonewalled by bmerge

---
 R/bmerge.R            |  2 +-
 R/data.table.R        |  2 +-
 R/setkey.R            |  5 +++--
 inst/tests/tests.Rraw | 38 ++++++++++++++++++++++++++++++++------
 4 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/R/bmerge.R b/R/bmerge.R
index 039ec127a5..b3aef83c2f 100644
--- a/R/bmerge.R
+++ b/R/bmerge.R
@@ -14,7 +14,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
   # careful to only plonk syntax (full column) on i/x from now on otherwise user's i and x would change;
   #   this is why shallow() is very importantly internal only, currently.
 
-  supported = c("logical", "integer", "double", "character", "factor", "integer64")
+  supported = c(ORDERING_TYPES, "factor", "integer64")
 
   getClass = function(x) {
     ans = typeof(x)
diff --git a/R/data.table.R b/R/data.table.R
index a13597926b..7436729314 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -829,7 +829,7 @@ replace_order = function(isub, verbose, env) {
         if (!is.list(byval)) stop("'by' or 'keyby' must evaluate to a vector or a list of vectors (where 'list' includes data.table and data.frame which are lists, too)")
         if (length(byval)==1L && is.null(byval[[1L]])) bynull=TRUE #3530 when by=(function()NULL)()
         if (!bynull) for (jj in seq_len(length(byval))) {
-          if (!typeof(byval[[jj]]) %chin% c("integer","logical","character","double")) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]")
+          if (!typeof(byval[[jj]]) %chin% c("integer","logical","character","double","complex")) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]")
         }
         tt = vapply_1i(byval,length)
         if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be length ", xnrow, "; the same length as there are rows in x (after subsetting if i is provided).")
diff --git a/R/setkey.R b/R/setkey.R
index f46580cbde..9da7d1bca5 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -83,7 +83,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
   if (".xi" %chin% names(x)) stop("x contains a column called '.xi'. Conflicts with internal use by data.table.")
   for (i in cols) {
     .xi = x[[i]]  # [[ is copy on write, otherwise checking type would be copying each column
-    if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.")
+    if (!typeof(.xi) %chin% ORDERING_TYPES) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.")
   }
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
 
@@ -181,6 +181,7 @@ is.sorted = function(x, by=seq_along(x)) {
   # Important to call forder.c::fsorted here, for consistent character ordering and numeric/integer64 twiddling.
 }
 
+ORDERING_TYPES = c('logical', 'integer', 'double', 'complex', 'character')
 forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.last=FALSE)
 {
   if (!(sort || retGrp)) stop("At least one of retGrp or sort must be TRUE")
@@ -364,7 +365,7 @@ setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE)
   if (".xi" %chin% colnames(x)) stop("x contains a column called '.xi'. Conflicts with internal use by data.table.")
   for (i in cols) {
     .xi = x[[i]]  # [[ is copy on write, otherwise checking type would be copying each column
-    if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported for ordering currently.")
+    if (!typeof(.xi) %chin% ORDERING_TYPES) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported for ordering currently.")
   }
   if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
 
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 32b1196283..c845f4b87c 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15214,12 +15214,38 @@ DT = data.table(
   b = c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i),
   c = 6
 )
-test(2063.1, DT[order(a, b)], DT[base::order(a, b)])
-test(2063.2, DT[order(a, -b)], DT[base::order(a, -b)])
-test(2063.3, forderv(DT$b, order = 1L), base::order(DT$b))
-test(2063.4, forderv(DT$b, order = -1L), base::order(-DT$b))
-test(2063.5, forderv(DT, by = 2:1), forderv(DT[ , 2:1]))
-test(2063.6, forderv(DT, by = 2:1, order = c(1L, -1L)), DT[order(b, -a), which = TRUE])
+test(2063.01, DT[order(a, b)], DT[base::order(a, b)])
+test(2063.02, DT[order(a, -b)], DT[base::order(a, -b)])
+test(2063.03, forderv(DT$b, order = 1L), base::order(DT$b))
+test(2063.04, forderv(DT$b, order = -1L), base::order(-DT$b))
+test(2063.05, forderv(DT, by = 2:1), forderv(DT[ , 2:1]))
+test(2063.06, forderv(DT, by = 2:1, order = c(1L, -1L)), DT[order(b, -a), which = TRUE])
+
+# downstreams of forder
+DT = data.table(
+  z = c(0, 0, 1, 1, 2, 3) + c(1, 1, 2, 2, 3, 4)*1i,
+  grp = rep(1:2, 3L),
+  v = c(3, 1, 4, 1, 5, 9)
+)
+unq_z = 0:3 + (1:4)*1i
+test(2063.07, DT[ , .N, by=z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L)))
+# uniqlist.c needs work
+# test(2063.08, DT[ , .N, keyby = z],
+# DT = setkey(copy(DT[.N:1]), z)
+# test(2963.09, key(DT), 'z')
+# test(2963.10, DT
+test(2963.11, dcast(DT, z ~ grp, value.var='v', fill=0),
+     data.table(z=unq_z, `1`=c(3, 4, 5, 0), `2`=c(1, 1, 0, 9), key='z'))
+test(2963.12, frank(DT$z), c(1.5, 1.5, 3.5, 5, 6))
+test(2963.13, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 5L, 6L))
+test(2963.14, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
+test(2963.15, DT[ , rowid(z, grp)], rep(1L, 6L))
+test(1963.16, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
+
+DT1 = data.table(z = c(0+1i, 2-3i, 4+1i))
+DT2 = data.table(z = c(2-3i, 0+1i, 0+0i))
+DT1[DT2, on = 'z']
+
 
 ###################################
 #  Add new tests above this line  #

From 52bcac04aba8ad76873f48a226fb4ac0ac98c954 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sun, 14 Jul 2019 00:28:17 +0800
Subject: [PATCH 08/25] moved new logic to C so e.g. bmerge can call it from
 there

---
 R/setkey.R            | 34 ------------------------------
 inst/tests/tests.Rraw | 28 +++++++++++--------------
 src/forder.c          | 49 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index 94132a6af0..bed50cba50 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -197,17 +197,6 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las
     by = NULL
     if ( !missing(order) && (length(order) != 1L || !(order %in% c(1L, -1L))) )
       stop("x is a single vector, length(order) must be =1 and it's value should be 1 (ascending) or -1 (descending).")
-    # we don't expect users to need complex sorting extensively
-    #   or on massive data sets, so we take the approach of
-    #   splitting a complex vector into its real & imaginary parts
-    #   and using the regular forderv machinery to sort; a baremetal
-    #   implementation would at root do the same, but the approach
-    #   here is a bit more slapdash with respect to memory efficiency
-    if (is.complex(x)) {
-      x = list(Re(x), Im(x))
-      by = 1:2
-      order = rep(order, 2L)
-    }
   } else {
     if (!length(x)) return(integer(0L)) # to be consistent with base::order. this'll make sure forderv(NULL) will result in error
                        # (as base does) but forderv(data.table(NULL)) and forderv(list()) will return integer(0L))
@@ -223,29 +212,6 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las
     if ( (length(order) != 1L && length(order) != length(by)) || !all(order %in% c(1L, -1L)) )
       stop("x is a list, length(order) must be either =1 or =length(by) and each value should be 1 or -1 for each column in 'by', corresponding to ascending or descending order, respectively. If length(order) == 1, it will be recycled to length(by).")
     if (length(order) == 1L) order = rep(order, length(by))
-    nn = length(x)
-    is_cplx = vapply_1b(by, function(i) {
-      if (i < 1L) stop("'by' must be positive, received ", i, call. = FALSE)
-      if (i > nn) stop("'by' must be in [1,", nn, "], received ", i, call. = FALSE)
-      is.complex(x[[i]])
-    })
-    if (any(is_cplx)) {
-      out_x = vector('list', length(by) + sum(is_cplx))
-      order = rep(order, is_cplx + 1L)
-      j = 1L
-      for (i in seq_along(by)) {
-        if (is_cplx[i]) {
-          out_x[[j]] = Re(x[[by[i]]])
-          out_x[[j + 1L]] = Im(x[[by[i]]])
-          j = j + 2L
-        } else {
-          out_x[[j]] = x[[by[i]]]
-          j = j + 1L
-        }
-      }
-      x = out_x; rm(out_x)
-      by = seq_along(x)
-    }
   }
   order = as.integer(order)
   .Call(Cforder, x, by, retGrp, sort, order, na.last)  # returns integer() if already sorted, regardless of sort=TRUE|FALSE
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e919fef605..e335d9a092 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -2783,7 +2783,7 @@ test(988, unique(dt, by='B'), dt[!duplicated(df[, 'B'])])
 test(989, unique(dt, by='C'), dt[!duplicated(df[, 'C'])])
 test(990, unique(dt, by=c('B', 'C')), dt[!duplicated(df[, c('B', 'C')])])
 test(991, unique(dt, by=NULL), dt[!duplicated(df)])
-test(991.1, unique(dt, by=4), error="'by' must be in [1,3], received 4")
+test(991.1, unique(dt, by=4), error="'by' value 4 out of range.*1,3")
 test(991.2, unique(dt, by=c(1,3.1)), error="'by' is type 'double' and one or more items in it are not whole integers")
 test(991.3, unique(dt, by=2:3), dt[!duplicated(df[,c('B','C')])])
 test(991.4, unique(dt, by=c('C','D','E')), error="'by' contains 'D' which is not a column name")
@@ -13158,9 +13158,9 @@ setnames(DT, '.xi')
 setkey(DT, NULL)
 test(1962.037, setkey(DT, .xi),
      error = "x contains a column called '.xi'")
-DT = data.table(a = 1+3i)
+DT = data.table(a = as.raw(0))
 test(1962.038, setkey(DT, a),
-     error = "Column 'a' is type 'complex'")
+     error = "Column 'a' is type 'raw'")
 
 test(1962.039, is.sorted(3:1, by = 'x'),
      error = 'x is vector but')
@@ -13216,8 +13216,8 @@ test(1962.064, setorderv(copy(DT)),
 test(1962.065, setorderv(DT, 'c'), error = 'some columns are not in the data.table')
 setnames(DT, 1L, '.xi')
 test(1962.066, setorderv(DT, 'b'), error = "x contains a column called '.xi'")
-test(1962.067, setorderv(data.table(a = 1+3i), 'a'),
-     error = "Column 'a' is type 'complex'")
+test(1962.067, setorderv(data.table(a = as.raw(0)), 'a'),
+     error = "Column 'a' is type 'raw'")
 
 DT = data.table(
   color = c("yellow", "red", "green", "red", "green", "red",
@@ -13743,7 +13743,7 @@ test(1984.05, DT[ , sum(b), keyby = c, verbose = TRUE],
 ### hitting byval = eval(bysub, setattr(as.list(seq_along(xss)), ...)
 test(1984.06, DT[1:3, sum(a), by=b:c], data.table(b=10:8, c=1:3, V1=1:3))
 test(1984.07, DT[, sum(a), by=call('sin',pi)], error='must evaluate to a vector or a list of vectors')
-test(1984.08, DT[, sum(a), by=1+3i],           error='column or expression.*type complex')
+test(1984.08, DT[, sum(a), by=as.raw(0)],           error='column or expression.*type raw')
 test(1984.09, DT[, sum(a), by=.(1,1:2)],       error='The items.*list are length [(]1,2[)].*Each must be length 10; .*rows in x.*after subsetting')
 options('datatable.optimize' = Inf)
 test(1984.10, DT[ , 1, by = .(a %% 2), verbose = TRUE],
@@ -14755,14 +14755,14 @@ dt1 <- data.table(int = 1L:10L,
                   bool = c(rep(FALSE, 9), TRUE),
                   char = letters[1L:10L],
                   fact = factor(letters[1L:10L]),
-                  complex = as.complex(1:5))
+                  raw = as.raw(1:5))
 dt2 <- data.table(int = 1L:5L,
                   doubleInt = as.numeric(1:5),
                   realDouble = seq(0.5, 2.5, by = 0.5),
                   bool = TRUE,
                   char = letters[1L:5L],
                   fact = factor(letters[1L:5L]),
-                  complex = as.complex(1:5))
+                  raw = as.raw(1:5))
 if (test_bit64) {
   dt1[, int64 := as.integer64(c(1:9, 3e10))]
   dt2[, int64 := as.integer64(c(1:4, 3e9))]
@@ -14779,8 +14779,8 @@ test(2044.08, nrow(dt1[dt2, on="fact==fact",             verbose=TRUE]), nrow(dt
 if (test_bit64) {
   test(2044.09, nrow(dt1[dt2, on = "int64==int64",       verbose=TRUE]), nrow(dt2), output="No coercion needed")
 }
-test(2044.10, dt1[dt2, on = "int==complex"],   error = "i.complex is type complex which is not supported by data.table join")
-test(2044.11, dt1[dt2, on = "complex==int"],   error = "x.complex is type complex which is not supported by data.table join")
+test(2044.10, dt1[dt2, on = "int==raw"],   error = "i.raw is type raw which is not supported by data.table join")
+test(2044.11, dt1[dt2, on = "raw==int"],   error = "x.raw is type raw which is not supported by data.table join")
 # incompatible types
 test(2044.20, dt1[dt2, on="bool==int"],        error="Incompatible join types: x.bool (logical) and i.int (integer)")
 test(2044.21, dt1[dt2, on="bool==doubleInt"],  error="Incompatible join types: x.bool (logical) and i.doubleInt (double)")
@@ -15270,16 +15270,12 @@ test(2064.07, DT[ , .N, by=z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L)))
 # test(2964.10, DT
 test(2964.11, dcast(DT, z ~ grp, value.var='v', fill=0),
      data.table(z=unq_z, `1`=c(3, 4, 5, 0), `2`=c(1, 1, 0, 9), key='z'))
-test(2964.12, frank(DT$z), c(1.5, 1.5, 3.5, 5, 6))
-test(2964.13, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 5L, 6L))
+test(2964.12, frank(DT$z), c(1.5, 1.5, 3.5, 3.5, 5, 6))
+test(2964.13, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L))
 test(2964.14, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
 test(2964.15, DT[ , rowid(z, grp)], rep(1L, 6L))
 test(2964.16, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
 
-DT1 = data.table(z = c(0+1i, 2-3i, 4+1i))
-DT2 = data.table(z = c(2-3i, 0+1i, 0+0i))
-DT1[DT2, on = 'z']
-
 ###################################
 #  Add new tests above this line  #
 ###################################
diff --git a/src/forder.c b/src/forder.c
index d810bf6536..9e8630b744 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -440,11 +440,58 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
   if (!isInteger(by) || !LENGTH(by)) error("DT has %d columns but 'by' is either not integer or is length 0", length(DT));  // seq_along(x) at R level
   if (!isInteger(ascArg) || LENGTH(ascArg)!=LENGTH(by)) error("Either 'ascArg' is not integer or its length (%d) is different to 'by's length (%d)", LENGTH(ascArg), LENGTH(by));
   nrow = length(VECTOR_ELT(DT,0));
+  int n_cplx = 0;
   for (int i=0; i<LENGTH(by); i++) {
     if (INTEGER(by)[i] < 1 || INTEGER(by)[i] > length(DT))
-      error("Internal error: 'by' out of range should have been caught earlier"); // #nocov
+      error("'by' value %d out of range [1,%d]", INTEGER(by)[i], length(DT));
     if ( nrow != length(VECTOR_ELT(DT, INTEGER(by)[i]-1)) )
       error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow);
+    if (TYPEOF(VECTOR_ELT(DT, i)) == CPLXSXP) {
+      n_cplx++;
+    }
+  }
+  if (n_cplx) {
+    // we don't expect users to need complex sorting extensively
+    //   or on massive data sets, so we take the approach of
+    //  splitting a complex vector into its real & imaginary parts
+    //  and using the regular forderv machinery to sort; a baremetal
+    //  implementation would at root do the same, but the approach
+    //  here is a bit more slapdash with respect to memory efficiency
+    //  (seen clearly here at C from the 3+2*n_cplx PROTECT() calls)
+    int n_out = length(by) + n_cplx;
+    SEXP new_dt = PROTECT(allocVector(VECSXP, n_out)); n_protect++;
+    SEXP new_asc = PROTECT(allocVector(INTSXP, n_out)); n_protect++;
+    SEXP new_by = PROTECT(allocVector(INTSXP, n_out)); n_protect++;
+    int j = 0;
+    for (int i=0; i<length(by); i++) {
+      int by_idx = INTEGER(by)[i]-1;
+      if (TYPEOF(VECTOR_ELT(DT, by_idx)) == CPLXSXP) {
+        SEXP realPart = PROTECT(allocVector(REALSXP, nrow)); n_protect++;
+        SEXP cplxPart = PROTECT(allocVector(REALSXP, nrow)); n_protect++;
+        double *pre = REAL(realPart);
+        double *pim = REAL(cplxPart);
+        Rcomplex *pz = COMPLEX(VECTOR_ELT(DT, by_idx));
+        for (int i = 0; i < nrow; i++) {
+          pre[i] = pz[i].r;
+          pim[i] = pz[i].i;
+        }
+        SET_VECTOR_ELT(new_dt, j, realPart);
+        SET_VECTOR_ELT(new_dt, j+1, cplxPart);
+        INTEGER(new_asc)[j] = INTEGER(ascArg)[i];
+        INTEGER(new_asc)[j+1] = INTEGER(ascArg)[i];
+        INTEGER(new_by)[j] = j+1;
+        INTEGER(new_by)[j+1] = j+2;
+        j += 2;
+      } else {
+        SET_VECTOR_ELT(new_dt, j, VECTOR_ELT(DT, by_idx));
+        INTEGER(new_asc)[j] = INTEGER(ascArg)[i];
+        INTEGER(new_by)[j] = j+1;
+        j += 1;
+      }
+    }
+    DT = new_dt;
+    ascArg = new_asc;
+    by = new_by;
   }
 
   if (!isLogical(retGrpArg) || LENGTH(retGrpArg)!=1 || INTEGER(retGrpArg)[0]==NA_LOGICAL) error("retGrp must be TRUE or FALSE");

From bf59da2ae8686f192a39230258f3a2bf2f8d7661 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sun, 14 Jul 2019 01:55:22 +0800
Subject: [PATCH 09/25] some coverage tests, extension to rleid()

---
 R/data.table.R        |  2 +-
 R/setkey.R            | 11 +++--------
 inst/tests/tests.Rraw | 16 ++++++++++++++++
 src/forder.c          |  9 ++++++---
 src/uniqlist.c        | 13 +++++++++++++
 5 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/R/data.table.R b/R/data.table.R
index 4eab866fe3..829d07b83c 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -829,7 +829,7 @@ replace_order = function(isub, verbose, env) {
         if (!is.list(byval)) stop("'by' or 'keyby' must evaluate to a vector or a list of vectors (where 'list' includes data.table and data.frame which are lists, too)")
         if (length(byval)==1L && is.null(byval[[1L]])) bynull=TRUE #3530 when by=(function()NULL)()
         if (!bynull) for (jj in seq_len(length(byval))) {
-          if (!typeof(byval[[jj]]) %chin% c("integer","logical","character","double","complex")) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]")
+          if (!typeof(byval[[jj]]) %chin% ORDERING_TYPES) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]")
         }
         tt = vapply_1i(byval,length)
         if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be length ", xnrow, "; the same length as there are rows in x (after subsetting if i is provided).")
diff --git a/R/setkey.R b/R/setkey.R
index bed50cba50..810a4526f7 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -51,14 +51,9 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU
   }
   if (identical(cols,"")) stop("cols is the empty string. Use NULL to remove the key.")
   if (!all(nzchar(cols))) stop("cols contains some blanks.")
-  if (!length(cols)) {
-    cols = colnames(x)   # All columns in the data.table, usually a few when used in this form
-  } else {
-    # remove backticks from cols
-    cols = gsub("`", "", cols, fixed = TRUE)
-    miss = !(cols %chin% colnames(x))
-    if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=","))
-  }
+  cols = gsub("`", "", cols, fixed = TRUE)
+  miss = !(cols %chin% colnames(x))
+  if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=","))
 
   ## determine, whether key is already present:
   if (identical(key(x),cols)) {
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e335d9a092..e593374b7e 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15275,6 +15275,22 @@ test(2964.13, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L))
 test(2964.14, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
 test(2964.15, DT[ , rowid(z, grp)], rep(1L, 6L))
 test(2964.16, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
+test(2964.17, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L)))
+
+## assorted coverage tests from along the way
+if (test_bit64) {
+  test(2964.50, is.sorted(as.integer64(10:1)), FALSE)
+  test(2964.51, is.sorted(as.integer64(1:10)))
+}
+# sort by vector outside of table
+ord = 3:1
+test(2964.52, forder(data.table(a = 3:1), ord), 3:1)
+
+# DT1 = data.table(z = c(0+1i, 2-3i, 4+1i))
+# DT2 = data.table(z = c(2-3i, 0+1i, 0+0i))
+# DT1[DT2, on = 'z']
+
+
 
 ###################################
 #  Add new tests above this line  #
diff --git a/src/forder.c b/src/forder.c
index 9e8630b744..edc8eab1bf 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -446,9 +446,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
       error("'by' value %d out of range [1,%d]", INTEGER(by)[i], length(DT));
     if ( nrow != length(VECTOR_ELT(DT, INTEGER(by)[i]-1)) )
       error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow);
-    if (TYPEOF(VECTOR_ELT(DT, i)) == CPLXSXP) {
-      n_cplx++;
-    }
+    if (TYPEOF(VECTOR_ELT(DT, i)) == CPLXSXP) n_cplx++;
   }
   if (n_cplx) {
     // we don't expect users to need complex sorting extensively
@@ -461,11 +459,16 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
     int n_out = length(by) + n_cplx;
     SEXP new_dt = PROTECT(allocVector(VECSXP, n_out)); n_protect++;
     SEXP new_asc = PROTECT(allocVector(INTSXP, n_out)); n_protect++;
+    // will be simply 1:n_out
     SEXP new_by = PROTECT(allocVector(INTSXP, n_out)); n_protect++;
     int j = 0;
     for (int i=0; i<length(by); i++) {
       int by_idx = INTEGER(by)[i]-1;
       if (TYPEOF(VECTOR_ELT(DT, by_idx)) == CPLXSXP) {
+        // I don't see any shorthand way of splitting of the real&imaginary components,
+        //   i.e., a shorthand way of doing Re(z), Im(z). That includes searching all of
+        //   the r-source code & all of the r-devel archives. So just reproduce Re(), Im()
+        //   as done in do_cmathfuns in complex.c
         SEXP realPart = PROTECT(allocVector(REALSXP, nrow)); n_protect++;
         SEXP cplxPart = PROTECT(allocVector(REALSXP, nrow)); n_protect++;
         double *pre = REAL(realPart);
diff --git a/src/uniqlist.c b/src/uniqlist.c
index d9ac35300c..ecf1f3e80a 100644
--- a/src/uniqlist.c
+++ b/src/uniqlist.c
@@ -1,4 +1,5 @@
 #include "data.table.h"
+#include <complex.h>
 
 // DONE: return 'uniqlist' as a vector (same as duplist) and write a separate function to get group sizes
 // Also improvements for numeric type with a hack of checking unsigned int (to overcome NA/NaN/Inf/-Inf comparisons) (> 2x speed-up)
@@ -200,6 +201,11 @@ SEXP rleid(SEXP l, SEXP cols) {
           // 8 bytes of bits are identical. For real (no rounding currently) and integer64
           // long long == 8 bytes checked in init.c
           break;
+        case CPLXSXP: {
+          // tried to make long long complex * but got a warning that it's a GNU extension
+          double complex *pz = (double complex *)COMPLEX(jcol);
+          same = (long long)creal(pz[i]) == (long long)creal(pz[i-1]) && (long long)cimag(pz[i]) == (long long)cimag(pz[i-1]);
+        } break;
         default :
           error("Type '%s' not supported", type2char(TYPEOF(jcol)));  // # nocov
         }
@@ -232,6 +238,13 @@ SEXP rleid(SEXP l, SEXP cols) {
         }
       }
       break;
+    case CPLXSXP: {
+      double complex *pzjcol = (double complex *)COMPLEX(jcol);
+      for (R_xlen_t i=1; i<nrow; i++) {
+        bool same = (long long)creal(pzjcol[i]) == (long long)creal(pzjcol[i-1]) && (long long)cimag(pzjcol[i]) == (long long)cimag(pzjcol[i-1]);
+        ians[i] = (grp += !same);
+      }
+    } break;
     default :
       error("Type '%s' not supported", type2char(TYPEOF(jcol)));
     }

From 09c8b192ba78c9b9152605a29c146b736256637b Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sun, 14 Jul 2019 03:34:53 +0800
Subject: [PATCH 10/25] progress making ctwiddle (dtwiddle for cplx)

---
 inst/tests/tests.Rraw |  2 +-
 src/data.table.h      |  6 ++++++
 src/forder.c          | 32 ++++++++++++++++++++++++++++++++
 src/uniqlist.c        | 15 ++++++++++++++-
 4 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e593374b7e..b4f765cc94 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6474,7 +6474,7 @@ test(1464.03, rleidv(DT, "b"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
 test(1464.04, rleid(DT$b), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
 test(1464.05, rleidv(DT, "c"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
 test(1464.06, rleid(DT$c), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
-test(1464.07, rleid(as.complex(c(1,0+5i,0+5i,1))), error="Type 'complex' not supported")
+test(1464.07, rleid(as.raw(c(3L, 1L, 2L)), error="Type 'raw' not supported")
 test(1464.08, rleidv(DT, 0), error="outside range")
 test(1464.09, rleidv(DT, 5), error="outside range")
 test(1464.10, rleidv(DT, 1:4), 1:nrow(DT))
diff --git a/src/data.table.h b/src/data.table.h
index 55c458d077..a84809c292 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -113,6 +113,12 @@ SEXP setcolorder(SEXP x, SEXP o);
 // forder.c
 int StrCmp(SEXP x, SEXP y);
 uint64_t dtwiddle(void *p, int i);
+// for sorting of complex values
+typedef struct cplxTwiddled {
+  uint64_t re;
+  uint64_t im;
+} cplxTwiddled;
+cplxTwiddled ctwiddle(Rcomplex *p, int i);
 SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg);
 bool need2utf8(SEXP x, int n);
 SEXP isReallyReal(SEXP);
diff --git a/src/forder.c b/src/forder.c
index edc8eab1bf..d47647d11e 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -409,6 +409,38 @@ uint64_t dtwiddle(void *p, int i)
   Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf");  // # nocov
 }
 
+cplxTwiddled ctwiddle(Rcomplex *p, int i) {
+  union {
+    double d;
+    uint64_t u64;
+  } u_re, u_im;
+  cplxTwiddled out;
+  Rcomplex z = p[i];
+  u_re.d = z.r;
+  u_im.d = z.i;
+  if (R_FINITE(u_re.d)) {
+    if (u_re.d==0) u_re.d=0;
+    u_re.u64 ^= (u_re.u64 & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000;
+    u_re.u64 += (u_re.u64 & dmask) << 1;
+    out.re = u_re.u64 >> (dround*8);
+  } else if (ISNAN(u_re.d)) {
+    out.re = ISNA(u_re.d) ? 0 : 1;
+  } else if (isinf(u_re.d)) {
+    out.re = signbit(u_re.d) ? 2: (0xffffffffffffffff>>(dround*8));
+  } else Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf");  // # nocov
+  if (R_FINITE(u_im.d)) {
+    if (u_im.d==0) u_im.d=0;
+    u_im.u64 ^= (u_im.u64 & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000;
+    u_im.u64 += (u_im.u64 & dmask) << 1;
+    out.re = u_im.u64 >> (dround*8);
+  } else if (ISNAN(u_im.d)) {
+    out.re = ISNA(u_im.d) ? 0 : 1;
+  } else if (isinf(u_im.d)) {
+    out.re = signbit(u_im.d) ? 2: (0xffffffffffffffff>>(dround*8));
+  } else Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf");  // # nocov
+  return out;
+}
+
 void radix_r(const int from, const int to, const int radix);
 
 SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, SEXP naArg)
diff --git a/src/uniqlist.c b/src/uniqlist.c
index ecf1f3e80a..370c11a75d 100644
--- a/src/uniqlist.c
+++ b/src/uniqlist.c
@@ -1,5 +1,4 @@
 #include "data.table.h"
-#include <complex.h>
 
 // DONE: return 'uniqlist' as a vector (same as duplist) and write a separate function to get group sizes
 // Also improvements for numeric type with a hack of checking unsigned int (to overcome NA/NaN/Inf/-Inf comparisons) (> 2x speed-up)
@@ -124,6 +123,15 @@ SEXP uniqlist(SEXP l, SEXP order)
             // to be stored, ii) many short-circuit early before the if (!b) anyway (negating benefit) and iii) we may not have needed LHS this time so logic would be complex.
           }
           break;
+        case CPLXSXP: {
+          Rcomplex *pz = COMPLEX(v);
+          b = (unsigned long long)pz[thisi].r == (unsigned long long)pz[previ].r &&
+            (unsigned long long)pz[thisi].i == (unsigned long long)pz[previ].i;
+          if (!b) {
+            cplxTwiddled thisz=ctwiddle(pz, thisi), prevz=ctwiddle(pz, previ);
+            b = thisz.re == prevz.re && thisz.im == prevz.im;
+          }
+        } break;
         default :
           error("Type '%s' not supported", type2char(TYPEOF(v)));  // # nocov
         }
@@ -318,6 +326,11 @@ SEXP nestedid(SEXP l, SEXP cols, SEXP order, SEXP grps, SEXP resetvals, SEXP mul
           b = i64[j] ? ((int64_t *)xd)[thisi] >= ((int64_t *)xd)[previ] :
                        dtwiddle(xd, thisi) >= dtwiddle(xd, previ);
         } break;
+        case CPLXSXP: {
+          Rcomplex *pz = COMPLEX(v);
+          cplxTwiddled thisz=ctwiddle(pz, thisi), prevz=ctwiddle(pz, previ);
+          b = thisz.re >= prevz.re || (thisz.re == thisz.re && thisz.im >= thisz.im);
+        } break;
         default:
           error("Type '%s' not supported", type2char(TYPEOF(v)));  // # nocov
         }

From e5d1d1de137a6c5c850e415faa6fa9d719777824 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sun, 14 Jul 2019 03:39:19 +0800
Subject: [PATCH 11/25] start preferring Rcomplex type

---
 inst/tests/tests.Rraw | 2 +-
 src/uniqlist.c        | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b4f765cc94..6cddff5046 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -6474,7 +6474,7 @@ test(1464.03, rleidv(DT, "b"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
 test(1464.04, rleid(DT$b), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
 test(1464.05, rleidv(DT, "c"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
 test(1464.06, rleid(DT$c), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L))
-test(1464.07, rleid(as.raw(c(3L, 1L, 2L)), error="Type 'raw' not supported")
+test(1464.07, rleid(as.raw(c(3L, 1L, 2L))), error="Type 'raw' not supported")
 test(1464.08, rleidv(DT, 0), error="outside range")
 test(1464.09, rleidv(DT, 5), error="outside range")
 test(1464.10, rleidv(DT, 1:4), 1:nrow(DT))
diff --git a/src/uniqlist.c b/src/uniqlist.c
index 370c11a75d..8951eb716d 100644
--- a/src/uniqlist.c
+++ b/src/uniqlist.c
@@ -211,8 +211,8 @@ SEXP rleid(SEXP l, SEXP cols) {
           break;
         case CPLXSXP: {
           // tried to make long long complex * but got a warning that it's a GNU extension
-          double complex *pz = (double complex *)COMPLEX(jcol);
-          same = (long long)creal(pz[i]) == (long long)creal(pz[i-1]) && (long long)cimag(pz[i]) == (long long)cimag(pz[i-1]);
+          Rcomplex *pz = COMPLEX(jcol);
+          same = (long long)pz[i].r == (long long)pz[i-1].r && (long long)pz[i].i == (long long)pz[i-1].i;
         } break;
         default :
           error("Type '%s' not supported", type2char(TYPEOF(jcol)));  // # nocov
@@ -247,9 +247,10 @@ SEXP rleid(SEXP l, SEXP cols) {
       }
       break;
     case CPLXSXP: {
-      double complex *pzjcol = (double complex *)COMPLEX(jcol);
+      Rcomplex *pzjcol = COMPLEX(jcol);
       for (R_xlen_t i=1; i<nrow; i++) {
-        bool same = (long long)creal(pzjcol[i]) == (long long)creal(pzjcol[i-1]) && (long long)cimag(pzjcol[i]) == (long long)cimag(pzjcol[i-1]);
+        bool same = (long long)pzjcol[i].r == (long long)pzjcol[i-1].r &&
+                    (long long)pzjcol[i].i == (long long)pzjcol[i-1].i;
         ians[i] = (grp += !same);
       }
     } break;

From 9a4cef49a6efc5f9f7d2f2fe2b5f84cf40cd0dbb Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sun, 14 Jul 2019 03:43:24 +0800
Subject: [PATCH 12/25] switch to Rcomplex API

---
 NEWS.md       | 2 +-
 src/reorder.c | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 81a50403ae..c28317ee24 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -129,7 +129,7 @@
     # TRUE
     ```
     
-19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not well-ordered (and thus not supported as keys). Thanks @atalikami for the report.
+19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not yet supported as keys). Thanks @atalikami for the report.
 
 #### BUG FIXES
 
diff --git a/src/reorder.c b/src/reorder.c
index d55d147283..b61286ff23 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -1,5 +1,4 @@
 #include "data.table.h"
-#include <complex.h>
 
 SEXP reorder(SEXP x, SEXP order)
 {
@@ -80,9 +79,9 @@ SEXP reorder(SEXP x, SEXP order)
       memcpy(vd+start, tmp+start, (end-start+1)*size);
     } else {
       // #1444 -- support for copying CPLXSXP (which have size 16)
-      double complex *vd = (double complex *)DATAPTR(v);
-      const double complex *restrict cvd = vd;
-      double complex *restrict tmp = (double complex *)TMP;
+      Rcomplex *vd = (Rcomplex *)DATAPTR(v);
+      const Rcomplex *restrict cvd = vd;
+      Rcomplex *restrict tmp = (Rcomplex *)TMP;
       #pragma omp parallel for num_threads(getDTthreads())
       for (int i=start; i<=end; i++) {
         tmp[i] = cvd[idx[i]-1];  // copies 16 bytes; including pointers on 64bit (STRSXP and VECSXP)

From 761fe90d5a78a106a2d7ff4dcc4f52b7b311f517 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Sun, 14 Jul 2019 20:16:10 +0800
Subject: [PATCH 13/25] setkey now works on complex columns

---
 inst/tests/tests.Rraw | 25 +++++++++++++------------
 src/forder.c          |  9 +++++----
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index c2777d3f9b..05831f8a26 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15242,7 +15242,6 @@ ll = list(1:2, NULL, 3:4)
 test(2063.4, transpose(ll, ignore=TRUE), list(c(1L, 3L), c(2L, 4L)))
 options(old)
 
-<<<<<<< HEAD
 # forderv (and downstream functions) handles complex vector input, part of #3690
 DT = data.table(
   a = c(1L, 1L, 8L, 2L, 1L, 9L, 3L, 2L, 6L, 6L),
@@ -15265,24 +15264,26 @@ DT = data.table(
 unq_z = 0:3 + (1:4)*1i
 test(2064.07, DT[ , .N, by=z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L)))
 test(2064.08, DT[ , .N, keyby = z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L), key='z'))
-test(2964.09, dcast(DT, z ~ grp, value.var='v', fill=0),
+test(2064.09, dcast(DT, z ~ grp, value.var='v', fill=0),
      data.table(z=unq_z, `1`=c(3, 4, 5, 0), `2`=c(1, 1, 0, 9), key='z'))
-test(2964.10, frank(DT$z), c(1.5, 1.5, 3.5, 3.5, 5, 6))
-test(2964.12, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L))
-test(2964.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
-test(2964.13, DT[ , rowid(z, grp)], rep(1L, 6L))
-test(2964.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
-test(2964.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L)))
+test(2064.10, frank(DT$z), c(1.5, 1.5, 3.5, 3.5, 5, 6))
+test(2064.12, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L))
+test(2064.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
+test(2064.13, DT[ , rowid(z, grp)], rep(1L, 6L))
+test(2064.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
+test(2064.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L)))
 
 # #1444 -- support for ordering tables with complex columns
 DT = data.table(a = 2:1, z = 0 + (1:0)*1i)
-test(2064.1, setkey(copy(DT), a), data.table(a=1:2, z=complex(0, 1:0), key='a'))
-test(2064.2, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(0, 1)))
+test(2064.16, setkey(copy(DT), a), data.table(a=1:2, z=0+ (0:1)*1i, key='a'))
+test(2064.17, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(1, 0)))
+test(2064.18, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
+
 # raw continues not to be supported
 DT = data.table(ID=2:1, r=as.raw(0:1))
-test(2064.3, setkey(DT, ID), error="Item 2 of list is type 'raw'")
+test(2064.18, setkey(DT, ID), error="Item 2 of list is type 'raw'")
 # setreordervec triggers !isNewList branch for coverage
-test(2064.4, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP")
+test(2064.19, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP")
 
 # DT1 = data.table(z = c(0+1i, 2-3i, 4+1i))
 # DT2 = data.table(z = c(2-3i, 0+1i, 0+0i))
diff --git a/src/forder.c b/src/forder.c
index d47647d11e..5f31bc1b2f 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -474,11 +474,12 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
   nrow = length(VECTOR_ELT(DT,0));
   int n_cplx = 0;
   for (int i=0; i<LENGTH(by); i++) {
-    if (INTEGER(by)[i] < 1 || INTEGER(by)[i] > length(DT))
-      error("'by' value %d out of range [1,%d]", INTEGER(by)[i], length(DT));
-    if ( nrow != length(VECTOR_ELT(DT, INTEGER(by)[i]-1)) )
+    int by_i = INTEGER(by)[i];
+    if (by_i < 1 || by_i > length(DT))
+      error("'by' value %d out of range [1,%d]", by_i, length(DT));
+    if ( nrow != length(VECTOR_ELT(DT, by_i-1)) )
       error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow);
-    if (TYPEOF(VECTOR_ELT(DT, i)) == CPLXSXP) n_cplx++;
+    if (TYPEOF(VECTOR_ELT(DT, by_i-1)) == CPLXSXP) n_cplx++;
   }
   if (n_cplx) {
     // we don't expect users to need complex sorting extensively

From 7261011f7bd18d48a2c88f0ba8ebe2cfe8f9d620 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Mon, 15 Jul 2019 01:42:39 +0800
Subject: [PATCH 14/25] ostensibly done uniqlist; progress on bmerge

---
 inst/tests/tests.Rraw |  7 +++--
 src/bmerge.c          | 70 +++++++++++++++++++++++++++++++++++++++++--
 src/uniqlist.c        | 44 +++++++++++++++++++--------
 3 files changed, 104 insertions(+), 17 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 05831f8a26..147316ba46 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15267,7 +15267,7 @@ test(2064.08, DT[ , .N, keyby = z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L), key
 test(2064.09, dcast(DT, z ~ grp, value.var='v', fill=0),
      data.table(z=unq_z, `1`=c(3, 4, 5, 0), `2`=c(1, 1, 0, 9), key='z'))
 test(2064.10, frank(DT$z), c(1.5, 1.5, 3.5, 3.5, 5, 6))
-test(2064.12, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L))
+test(2064.11, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L))
 test(2064.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
 test(2064.13, DT[ , rowid(z, grp)], rep(1L, 6L))
 test(2064.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
@@ -15278,12 +15278,13 @@ DT = data.table(a = 2:1, z = 0 + (1:0)*1i)
 test(2064.16, setkey(copy(DT), a), data.table(a=1:2, z=0+ (0:1)*1i, key='a'))
 test(2064.17, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(1, 0)))
 test(2064.18, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
+test(2064.19, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i))
 
 # raw continues not to be supported
 DT = data.table(ID=2:1, r=as.raw(0:1))
-test(2064.18, setkey(DT, ID), error="Item 2 of list is type 'raw'")
+test(2064.20, setkey(DT, ID), error="Item 2 of list is type 'raw'")
 # setreordervec triggers !isNewList branch for coverage
-test(2064.19, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP")
+test(2064.21, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP")
 
 # DT1 = data.table(z = c(0+1i, 2-3i, 4+1i))
 # DT2 = data.table(z = c(2-3i, 0+1i, 0+0i))
diff --git a/src/bmerge.c b/src/bmerge.c
index eb55569b00..090cc58f1a 100644
--- a/src/bmerge.c
+++ b/src/bmerge.c
@@ -204,6 +204,8 @@ static union {
   unsigned long long ull;
   long long ll;
   SEXP s;
+  Rcomplex z;
+  cplxTwiddled zull;
 } ival, xval;
 
 static uint64_t i64twiddle(void *p, int i)
@@ -299,7 +301,7 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
     // ilow and iupp now surround the group in ic, too
   }
     break;
-  case STRSXP :
+  case STRSXP : {
     if (op[col] != EQ) error("Only '==' operator is supported for columns of type %s.", type2char(TYPEOF(xc)));
     ival.s = ENC2UTF8(STRING_ELT(ic,ir));
     while(xlow < xupp-1) {
@@ -338,7 +340,7 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
       xval.s = ENC2UTF8(STRING_ELT(ic, o ? o[mid]-1 : mid));
       if (xval.s == ival.s) tmpupp=mid; else ilow=mid;   // see above re ==
     }
-    break;
+  }  break;
   case REALSXP : {
     double *dic = REAL(ic);
     double *dxc = REAL(xc);
@@ -405,6 +407,70 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
     // ilow and iupp now surround the group in ic, too
   }
     break;
+  case CPLXSXP : {
+    Rcomplex *dic = COMPLEX(ic);
+    Rcomplex *dxc = COMPLEX(xc);
+    ival.zull = ctwiddle(dic, ir);
+    while(xlow < xupp-1) {
+      int mid = xlow + (xupp-xlow)/2;
+      xval.zull = ctwiddle(dxc, XIND(mid));
+      if (xval.zull.re<ival.zull.re ||
+          (xval.zull.re == ival.zull.re && xval.zull.im < ival.zull.im)) {
+        xlow=mid;
+      } else if (xval.zull.re>ival.zull.re ||
+                 (xval.zull.re = ival.zull.re && xval.zull.im > ival.zull.im)) {
+        xupp=mid;
+      } else { // xval.zull == ival.zull
+        tmplow = mid;
+        tmpupp = mid;
+        while(tmplow<xupp-1) {
+          int mid = tmplow + (xupp-tmplow)/2;
+          xval.zull = ctwiddle(dxc, XIND(mid));
+          if (xval.zull.re == ival.zull.re && xval.zull.im == ival.zull.im) tmplow=mid; else xupp=mid;
+        }
+        while(xlow<tmpupp-1) {
+          int mid = xlow + (tmpupp-xlow)/2;
+          xval.zull = ctwiddle(dxc, XIND(mid));
+          if (xval.zull.re == ival.zull.re && xval.zull.im == ival.zull.im) tmpupp=mid; else xlow=mid;
+        }
+        break;
+      }
+    }
+    if (col>-1 && op[col] != EQ) {
+      Rboolean isivalNA = (Rboolean)(ISNAN(dic[ir].r) || ISNAN(dic[ir].i));
+      switch (op[col]) {
+      case LE : if (!isivalNA) xlow = xlowIn; break;
+      case LT : xupp = xlow + 1; if (!isivalNA) xlow = xlowIn; break;
+      case GE : if (!isivalNA) xupp = xuppIn; break;
+      case GT : xlow = xupp - 1; if (!isivalNA) xupp = xuppIn; break;
+      }
+      // for LE/LT cases, we need to ensure xlow excludes NA indices, != EQ is checked above already
+      if (op[col] <= 3 && xlow<xupp-1 && !isivalNA && (ISNAN(dxc[XIND(xlow+1)].r) || ISNAN(dxc[XIND(xlow+1)].i))) {
+        tmplow = xlow; tmpupp = xupp;
+        while (tmplow < tmpupp-1) {
+          int mid = tmplow + (tmpupp-tmplow)/2;
+          xval.z = COMPLEX(xc)[XIND(mid)];
+          if (ISNAN(xval.z.r) || ISNAN(xval.z.i)) tmplow = mid; else tmpupp = mid;
+        }
+        xlow = tmplow; // tmplow is the index of last NA value
+      }
+    }
+    tmplow = lir;
+    tmpupp = lir;
+    if (col>-1) {
+      while(tmplow<iupp-1) {
+        int mid = tmplow + (iupp-tmplow)/2;
+        xval.zull = ctwiddle(dic, o ? o[mid]-1 : mid);
+        if (xval.zull.re == ival.zull.re && xval.zull.im == ival.zull.im) tmplow=mid; else iupp=mid;
+      }
+      while(ilow<tmpupp-1) {
+        int mid = ilow + (tmpupp-ilow)/2;
+        xval.zull = ctwiddle(dic, o ? o[mid]-1 : mid);
+        if (xval.zull.re == ival.zull.re && xval.zull.im == ival.zull.im) tmpupp=mid; else ilow=mid;
+      }
+    }
+    // ilow and iupp now surround the group in ic, too
+  }  break;
   default:
     error("Type '%s' not supported as key column", type2char(TYPEOF(xc)));
   }
diff --git a/src/uniqlist.c b/src/uniqlist.c
index 8951eb716d..0a29ffeab5 100644
--- a/src/uniqlist.c
+++ b/src/uniqlist.c
@@ -28,17 +28,19 @@ SEXP uniqlist(SEXP l, SEXP order)
 
   if (ncol==1) {
 
+// unfortunately no equality method overloaded for Rcomplex, compare by components
+#define TEST_ELEM_NEQ if(elem != prev
+#define TEST_ELEM_NEQ_C if((elem.r != prev.r || elem.i != prev.i)
+
 #define COMPARE1                                                                 \
       prev = *vd;                                                                \
       for (int i=1; i<nrow; i++) {                                               \
-        elem = *++vd;                                                            \
-        if (elem!=prev
+        elem = *++vd;
 
 #define COMPARE1_VIA_ORDER                                                       \
       prev = vd[*o -1];                                                          \
       for (int i=1; i<nrow; i++) {                                               \
-        elem = vd[*++o -1];                                                      \
-        if (elem!=prev
+        elem = vd[*++o -1];
 
 #define COMPARE2                                                                 \
                         ) {                                                      \
@@ -59,19 +61,19 @@ SEXP uniqlist(SEXP l, SEXP order)
       int prev, elem;
       if (via_order) {
         // ad hoc by (order passed in)
-        COMPARE1_VIA_ORDER COMPARE2
+        COMPARE1_VIA_ORDER TEST_ELEM_NEQ COMPARE2
       } else {
         // e.g. by=key(DT)[1]
-        COMPARE1           COMPARE2
+        COMPARE1           TEST_ELEM_NEQ COMPARE2
       }
     } break;
     case STRSXP : {
       const SEXP *vd=STRING_PTR(v);
       SEXP prev, elem;
       if (via_order) {
-        COMPARE1_VIA_ORDER && ENC2UTF8(elem)!=ENC2UTF8(prev) COMPARE2   // but most of the time they are equal, so ENC2UTF8 doesn't need to be called
+        COMPARE1_VIA_ORDER TEST_ELEM_NEQ && ENC2UTF8(elem)!=ENC2UTF8(prev) COMPARE2   // but most of the time they are equal, so ENC2UTF8 doesn't need to be called
       } else {
-        COMPARE1           && ENC2UTF8(elem)!=ENC2UTF8(prev) COMPARE2
+        COMPARE1           TEST_ELEM_NEQ && ENC2UTF8(elem)!=ENC2UTF8(prev) COMPARE2
       }
     } break;
     case REALSXP : {
@@ -80,15 +82,33 @@ SEXP uniqlist(SEXP l, SEXP order)
       // grouping by integer64 makes sense (ids). grouping by float supported but a good use-case for that is harder to imagine
       if (getNumericRounding_C()==0 /*default*/ || inherits(v, "integer64")) {
         if (via_order) {
-          COMPARE1_VIA_ORDER COMPARE2
+          COMPARE1_VIA_ORDER TEST_ELEM_NEQ COMPARE2
+        } else {
+          COMPARE1           TEST_ELEM_NEQ COMPARE2
+        }
+      } else {
+        if (via_order) {
+          COMPARE1_VIA_ORDER TEST_ELEM_NEQ && dtwiddle(&elem, 0)!=dtwiddle(&prev, 0) COMPARE2
+        } else {
+          COMPARE1           TEST_ELEM_NEQ && dtwiddle(&elem, 0)!=dtwiddle(&prev, 0) COMPARE2
+        }
+      }
+    } break;
+    case CPLXSXP : {
+      const Rcomplex *vd=(const Rcomplex *)COMPLEX(v);
+      Rcomplex prev, elem;
+      if (getNumericRounding_C()==0) {
+        if (via_order) {
+          COMPARE1_VIA_ORDER TEST_ELEM_NEQ_C COMPARE2
         } else {
-          COMPARE1           COMPARE2
+          COMPARE1           TEST_ELEM_NEQ_C COMPARE2
         }
       } else {
+        cplxTwiddled elemz=ctwiddle(&elem, 0), prevz=ctwiddle(&prev, 0);
         if (via_order) {
-          COMPARE1_VIA_ORDER && dtwiddle(&elem, 0)!=dtwiddle(&prev, 0) COMPARE2
+          COMPARE1_VIA_ORDER TEST_ELEM_NEQ_C && (elemz.re != prevz.re || elemz.im != prevz.im) COMPARE2
         } else {
-          COMPARE1           && dtwiddle(&elem, 0)!=dtwiddle(&prev, 0) COMPARE2
+          COMPARE1           TEST_ELEM_NEQ_C && (elemz.re != prevz.re || elemz.im != prevz.im) COMPARE2
         }
       }
     } break;

From 5f443cb9c0247ae20cb1801434b1c4b669a06f6a Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Thu, 18 Jul 2019 09:51:57 +0800
Subject: [PATCH 15/25] scale back attempts at bmerge, all of uniqlist

---
 NEWS.md               |  5 ++--
 inst/tests/tests.Rraw |  1 -
 src/bmerge.c          | 68 +------------------------------------------
 src/data.table.h      |  6 ----
 src/forder.c          | 32 --------------------
 src/uniqlist.c        | 59 ++++++++-----------------------------
 6 files changed, 16 insertions(+), 155 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 1321c92ef7..704711026d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -131,12 +131,13 @@
     # TRUE
     ```
     
-19. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z1<z2` means `Re(z1) < Re(z2) | (Re(z1) == Re(z2) & Im(z1) < Im(z2))`). By extension, this means that complex columns can now be used in sorting keys; a related bug ([#1444](https://github.com/Rdatatable/data.table/issues/1444); thanks to @atalikami for the report) was preventing keys to be set on even the non-complex columns of a table.
-
 19. `shift` now supports type `complex`, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690).
 
 20. `setkey` now supports type `complex` as value columns (not as key columns), [#1444](https://github.com/Rdatatable/data.table/issues/1444). Thanks Gareth Ward for the report.
 
+21. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z1<z2` means `Re(z1) < Re(z2) | (Re(z1) == Re(z2) & Im(z1) < Im(z2))`).
+
+
 #### BUG FIXES
 
 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index a15b5e27c9..b0a1d91f13 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15360,7 +15360,6 @@ test(2069.11, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L))
 test(2069.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
 test(2069.13, DT[ , rowid(z, grp)], rep(1L, 6L))
 test(2069.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
-test(2069.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L)))
 
 # #1444 -- support for ordering tables with complex columns
 DT = data.table(a = 2:1, z = 0 + (1:0)*1i)
diff --git a/src/bmerge.c b/src/bmerge.c
index 090cc58f1a..14b774c4b3 100644
--- a/src/bmerge.c
+++ b/src/bmerge.c
@@ -204,8 +204,6 @@ static union {
   unsigned long long ull;
   long long ll;
   SEXP s;
-  Rcomplex z;
-  cplxTwiddled zull;
 } ival, xval;
 
 static uint64_t i64twiddle(void *p, int i)
@@ -407,72 +405,8 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
     // ilow and iupp now surround the group in ic, too
   }
     break;
-  case CPLXSXP : {
-    Rcomplex *dic = COMPLEX(ic);
-    Rcomplex *dxc = COMPLEX(xc);
-    ival.zull = ctwiddle(dic, ir);
-    while(xlow < xupp-1) {
-      int mid = xlow + (xupp-xlow)/2;
-      xval.zull = ctwiddle(dxc, XIND(mid));
-      if (xval.zull.re<ival.zull.re ||
-          (xval.zull.re == ival.zull.re && xval.zull.im < ival.zull.im)) {
-        xlow=mid;
-      } else if (xval.zull.re>ival.zull.re ||
-                 (xval.zull.re = ival.zull.re && xval.zull.im > ival.zull.im)) {
-        xupp=mid;
-      } else { // xval.zull == ival.zull
-        tmplow = mid;
-        tmpupp = mid;
-        while(tmplow<xupp-1) {
-          int mid = tmplow + (xupp-tmplow)/2;
-          xval.zull = ctwiddle(dxc, XIND(mid));
-          if (xval.zull.re == ival.zull.re && xval.zull.im == ival.zull.im) tmplow=mid; else xupp=mid;
-        }
-        while(xlow<tmpupp-1) {
-          int mid = xlow + (tmpupp-xlow)/2;
-          xval.zull = ctwiddle(dxc, XIND(mid));
-          if (xval.zull.re == ival.zull.re && xval.zull.im == ival.zull.im) tmpupp=mid; else xlow=mid;
-        }
-        break;
-      }
-    }
-    if (col>-1 && op[col] != EQ) {
-      Rboolean isivalNA = (Rboolean)(ISNAN(dic[ir].r) || ISNAN(dic[ir].i));
-      switch (op[col]) {
-      case LE : if (!isivalNA) xlow = xlowIn; break;
-      case LT : xupp = xlow + 1; if (!isivalNA) xlow = xlowIn; break;
-      case GE : if (!isivalNA) xupp = xuppIn; break;
-      case GT : xlow = xupp - 1; if (!isivalNA) xupp = xuppIn; break;
-      }
-      // for LE/LT cases, we need to ensure xlow excludes NA indices, != EQ is checked above already
-      if (op[col] <= 3 && xlow<xupp-1 && !isivalNA && (ISNAN(dxc[XIND(xlow+1)].r) || ISNAN(dxc[XIND(xlow+1)].i))) {
-        tmplow = xlow; tmpupp = xupp;
-        while (tmplow < tmpupp-1) {
-          int mid = tmplow + (tmpupp-tmplow)/2;
-          xval.z = COMPLEX(xc)[XIND(mid)];
-          if (ISNAN(xval.z.r) || ISNAN(xval.z.i)) tmplow = mid; else tmpupp = mid;
-        }
-        xlow = tmplow; // tmplow is the index of last NA value
-      }
-    }
-    tmplow = lir;
-    tmpupp = lir;
-    if (col>-1) {
-      while(tmplow<iupp-1) {
-        int mid = tmplow + (iupp-tmplow)/2;
-        xval.zull = ctwiddle(dic, o ? o[mid]-1 : mid);
-        if (xval.zull.re == ival.zull.re && xval.zull.im == ival.zull.im) tmplow=mid; else iupp=mid;
-      }
-      while(ilow<tmpupp-1) {
-        int mid = ilow + (tmpupp-ilow)/2;
-        xval.zull = ctwiddle(dic, o ? o[mid]-1 : mid);
-        if (xval.zull.re == ival.zull.re && xval.zull.im == ival.zull.im) tmpupp=mid; else ilow=mid;
-      }
-    }
-    // ilow and iupp now surround the group in ic, too
-  }  break;
   default:
-    error("Type '%s' not supported as key column", type2char(TYPEOF(xc)));
+    error("Type '%s' not supported for merging", type2char(TYPEOF(xc)));
   }
   if (xlow<xupp-1) { // if value found, low and upp surround it, unlike standard binary search where low falls on it
     if (col<ncol-1) {
diff --git a/src/data.table.h b/src/data.table.h
index 9c652374cc..35e17e4875 100644
--- a/src/data.table.h
+++ b/src/data.table.h
@@ -114,12 +114,6 @@ SEXP setcolorder(SEXP x, SEXP o);
 // forder.c
 int StrCmp(SEXP x, SEXP y);
 uint64_t dtwiddle(void *p, int i);
-// for sorting of complex values
-typedef struct cplxTwiddled {
-  uint64_t re;
-  uint64_t im;
-} cplxTwiddled;
-cplxTwiddled ctwiddle(Rcomplex *p, int i);
 SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg);
 bool need2utf8(SEXP x, int n);
 SEXP isReallyReal(SEXP);
diff --git a/src/forder.c b/src/forder.c
index 5f31bc1b2f..9a390c8d9b 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -409,38 +409,6 @@ uint64_t dtwiddle(void *p, int i)
   Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf");  // # nocov
 }
 
-cplxTwiddled ctwiddle(Rcomplex *p, int i) {
-  union {
-    double d;
-    uint64_t u64;
-  } u_re, u_im;
-  cplxTwiddled out;
-  Rcomplex z = p[i];
-  u_re.d = z.r;
-  u_im.d = z.i;
-  if (R_FINITE(u_re.d)) {
-    if (u_re.d==0) u_re.d=0;
-    u_re.u64 ^= (u_re.u64 & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000;
-    u_re.u64 += (u_re.u64 & dmask) << 1;
-    out.re = u_re.u64 >> (dround*8);
-  } else if (ISNAN(u_re.d)) {
-    out.re = ISNA(u_re.d) ? 0 : 1;
-  } else if (isinf(u_re.d)) {
-    out.re = signbit(u_re.d) ? 2: (0xffffffffffffffff>>(dround*8));
-  } else Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf");  // # nocov
-  if (R_FINITE(u_im.d)) {
-    if (u_im.d==0) u_im.d=0;
-    u_im.u64 ^= (u_im.u64 & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000;
-    u_im.u64 += (u_im.u64 & dmask) << 1;
-    out.re = u_im.u64 >> (dround*8);
-  } else if (ISNAN(u_im.d)) {
-    out.re = ISNA(u_im.d) ? 0 : 1;
-  } else if (isinf(u_im.d)) {
-    out.re = signbit(u_im.d) ? 2: (0xffffffffffffffff>>(dround*8));
-  } else Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf");  // # nocov
-  return out;
-}
-
 void radix_r(const int from, const int to, const int radix);
 
 SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, SEXP naArg)
diff --git a/src/uniqlist.c b/src/uniqlist.c
index 0a29ffeab5..10f9455c93 100644
--- a/src/uniqlist.c
+++ b/src/uniqlist.c
@@ -28,19 +28,17 @@ SEXP uniqlist(SEXP l, SEXP order)
 
   if (ncol==1) {
 
-// unfortunately no equality method overloaded for Rcomplex, compare by components
-#define TEST_ELEM_NEQ if(elem != prev
-#define TEST_ELEM_NEQ_C if((elem.r != prev.r || elem.i != prev.i)
-
 #define COMPARE1                                                                 \
       prev = *vd;                                                                \
       for (int i=1; i<nrow; i++) {                                               \
-        elem = *++vd;
+        elem = *++vd;                                                            \
+        if (elem!=prev
 
 #define COMPARE1_VIA_ORDER                                                       \
       prev = vd[*o -1];                                                          \
       for (int i=1; i<nrow; i++) {                                               \
-        elem = vd[*++o -1];
+        elem = vd[*++o -1];                                                      \
+        if (elem!=prev
 
 #define COMPARE2                                                                 \
                         ) {                                                      \
@@ -61,19 +59,19 @@ SEXP uniqlist(SEXP l, SEXP order)
       int prev, elem;
       if (via_order) {
         // ad hoc by (order passed in)
-        COMPARE1_VIA_ORDER TEST_ELEM_NEQ COMPARE2
+        COMPARE1_VIA_ORDER COMPARE2
       } else {
         // e.g. by=key(DT)[1]
-        COMPARE1           TEST_ELEM_NEQ COMPARE2
+        COMPARE1           COMPARE2
       }
     } break;
     case STRSXP : {
       const SEXP *vd=STRING_PTR(v);
       SEXP prev, elem;
       if (via_order) {
-        COMPARE1_VIA_ORDER TEST_ELEM_NEQ && ENC2UTF8(elem)!=ENC2UTF8(prev) COMPARE2   // but most of the time they are equal, so ENC2UTF8 doesn't need to be called
+        COMPARE1_VIA_ORDER && ENC2UTF8(elem)!=ENC2UTF8(prev) COMPARE2   // but most of the time they are equal, so ENC2UTF8 doesn't need to be called
       } else {
-        COMPARE1           TEST_ELEM_NEQ && ENC2UTF8(elem)!=ENC2UTF8(prev) COMPARE2
+        COMPARE1           && ENC2UTF8(elem)!=ENC2UTF8(prev) COMPARE2
       }
     } break;
     case REALSXP : {
@@ -82,33 +80,15 @@ SEXP uniqlist(SEXP l, SEXP order)
       // grouping by integer64 makes sense (ids). grouping by float supported but a good use-case for that is harder to imagine
       if (getNumericRounding_C()==0 /*default*/ || inherits(v, "integer64")) {
         if (via_order) {
-          COMPARE1_VIA_ORDER TEST_ELEM_NEQ COMPARE2
-        } else {
-          COMPARE1           TEST_ELEM_NEQ COMPARE2
-        }
-      } else {
-        if (via_order) {
-          COMPARE1_VIA_ORDER TEST_ELEM_NEQ && dtwiddle(&elem, 0)!=dtwiddle(&prev, 0) COMPARE2
-        } else {
-          COMPARE1           TEST_ELEM_NEQ && dtwiddle(&elem, 0)!=dtwiddle(&prev, 0) COMPARE2
-        }
-      }
-    } break;
-    case CPLXSXP : {
-      const Rcomplex *vd=(const Rcomplex *)COMPLEX(v);
-      Rcomplex prev, elem;
-      if (getNumericRounding_C()==0) {
-        if (via_order) {
-          COMPARE1_VIA_ORDER TEST_ELEM_NEQ_C COMPARE2
+          COMPARE1_VIA_ORDER COMPARE2
         } else {
-          COMPARE1           TEST_ELEM_NEQ_C COMPARE2
+          COMPARE1           COMPARE2
         }
       } else {
-        cplxTwiddled elemz=ctwiddle(&elem, 0), prevz=ctwiddle(&prev, 0);
         if (via_order) {
-          COMPARE1_VIA_ORDER TEST_ELEM_NEQ_C && (elemz.re != prevz.re || elemz.im != prevz.im) COMPARE2
+          COMPARE1_VIA_ORDER && dtwiddle(&elem, 0)!=dtwiddle(&prev, 0) COMPARE2
         } else {
-          COMPARE1           TEST_ELEM_NEQ_C && (elemz.re != prevz.re || elemz.im != prevz.im) COMPARE2
+          COMPARE1           && dtwiddle(&elem, 0)!=dtwiddle(&prev, 0) COMPARE2
         }
       }
     } break;
@@ -143,15 +123,6 @@ SEXP uniqlist(SEXP l, SEXP order)
             // to be stored, ii) many short-circuit early before the if (!b) anyway (negating benefit) and iii) we may not have needed LHS this time so logic would be complex.
           }
           break;
-        case CPLXSXP: {
-          Rcomplex *pz = COMPLEX(v);
-          b = (unsigned long long)pz[thisi].r == (unsigned long long)pz[previ].r &&
-            (unsigned long long)pz[thisi].i == (unsigned long long)pz[previ].i;
-          if (!b) {
-            cplxTwiddled thisz=ctwiddle(pz, thisi), prevz=ctwiddle(pz, previ);
-            b = thisz.re == prevz.re && thisz.im == prevz.im;
-          }
-        } break;
         default :
           error("Type '%s' not supported", type2char(TYPEOF(v)));  // # nocov
         }
@@ -347,11 +318,6 @@ SEXP nestedid(SEXP l, SEXP cols, SEXP order, SEXP grps, SEXP resetvals, SEXP mul
           b = i64[j] ? ((int64_t *)xd)[thisi] >= ((int64_t *)xd)[previ] :
                        dtwiddle(xd, thisi) >= dtwiddle(xd, previ);
         } break;
-        case CPLXSXP: {
-          Rcomplex *pz = COMPLEX(v);
-          cplxTwiddled thisz=ctwiddle(pz, thisi), prevz=ctwiddle(pz, previ);
-          b = thisz.re >= prevz.re || (thisz.re == thisz.re && thisz.im >= thisz.im);
-        } break;
         default:
           error("Type '%s' not supported", type2char(TYPEOF(v)));  // # nocov
         }
@@ -406,4 +372,3 @@ SEXP uniqueNlogical(SEXP x, SEXP narmArg) {
     return ScalarInteger(3-narm);
   return ScalarInteger(2-(narm && third!=NA_LOGICAL));
 }
-

From b922ee9d674d79a64b089b4973c1809e5a59c3fc Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Thu, 18 Jul 2019 09:58:53 +0800
Subject: [PATCH 16/25] tidy up tests

---
 inst/tests/tests.Rraw | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index b0a1d91f13..ce5ee7e356 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15360,32 +15360,21 @@ test(2069.11, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L))
 test(2069.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
 test(2069.13, DT[ , rowid(z, grp)], rep(1L, 6L))
 test(2069.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
+test(2069.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L)))
 
-# #1444 -- support for ordering tables with complex columns
+# setkey, setorder work
 DT = data.table(a = 2:1, z = 0 + (1:0)*1i)
-test(2069.16, setkey(copy(DT), a), data.table(a=1:2, z=0+ (0:1)*1i, key='a'))
-test(2069.17, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(1, 0)))
-test(2069.18, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
-test(2069.19, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i))
-
-# raw continues not to be supported
-DT = data.table(ID=2:1, r=as.raw(0:1))
-test(2069.20, setkey(DT, ID), error="Item 2 of list is type 'raw'")
-# setreordervec triggers !isNewList branch for coverage
-test(2069.21, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP")
-
-# DT1 = data.table(z = c(0+1i, 2-3i, 4+1i))
-# DT2 = data.table(z = c(2-3i, 0+1i, 0+0i))
-# DT1[DT2, on = 'z']
+test(2069.16, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
+test(2069.17, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i))
 
 ## assorted coverage tests from along the way
 if (test_bit64) {
-  test(2069.50, is.sorted(as.integer64(10:1)), FALSE)
-  test(2069.51, is.sorted(as.integer64(1:10)))
+  test(2069.18, is.sorted(as.integer64(10:1)), FALSE)
+  test(2069.19, is.sorted(as.integer64(1:10)))
 }
 # sort by vector outside of table
 ord = 3:1
-test(2069.52, forder(data.table(a = 3:1), ord), 3:1)
+test(2069.20, forder(data.table(a = 3:1), ord), 3:1)
 
 
 ###################################

From b6565415cf623499ac88f920f0fc8997ee02bf2b Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Thu, 18 Jul 2019 10:07:04 +0800
Subject: [PATCH 17/25] unique also works

---
 NEWS.md               |  2 +-
 inst/tests/tests.Rraw | 13 ++++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 704711026d..b08e965ec5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -130,7 +130,7 @@
     identical(y1,y2) && identical(y1,y3)
     # TRUE
     ```
-    
+
 19. `shift` now supports type `complex`, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690).
 
 20. `setkey` now supports type `complex` as value columns (not as key columns), [#1444](https://github.com/Rdatatable/data.table/issues/1444). Thanks Gareth Ward for the report.
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index ce5ee7e356..1751be76a3 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15361,20 +15361,23 @@ test(2069.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
 test(2069.13, DT[ , rowid(z, grp)], rep(1L, 6L))
 test(2069.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
 test(2069.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L)))
+test(2069.16, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9)))
+test(2069.17, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9)))
+test(2069.18, uniqueN(DT$z), 4L)
 
 # setkey, setorder work
 DT = data.table(a = 2:1, z = 0 + (1:0)*1i)
-test(2069.16, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
-test(2069.17, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i))
+test(2069.19, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
+test(2069.20, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i))
 
 ## assorted coverage tests from along the way
 if (test_bit64) {
-  test(2069.18, is.sorted(as.integer64(10:1)), FALSE)
-  test(2069.19, is.sorted(as.integer64(1:10)))
+  test(2069.21, is.sorted(as.integer64(10:1)), FALSE)
+  test(2069.22, is.sorted(as.integer64(1:10)))
 }
 # sort by vector outside of table
 ord = 3:1
-test(2069.20, forder(data.table(a = 3:1), ord), 3:1)
+test(2069.23, forder(data.table(a = 3:1), ord), 3:1)
 
 
 ###################################

From 3c3228b4fb7eeb20d2cb7308a63871baa2fd8910 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Thu, 18 Jul 2019 11:53:49 +0800
Subject: [PATCH 18/25] updated NEWS item & added coverage tests

---
 NEWS.md               |  2 +-
 inst/tests/tests.Rraw | 12 +++++++++++-
 src/dogroups.c        | 18 ++++++++++--------
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index b08e965ec5..85ec154838 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -135,7 +135,7 @@
 
 20. `setkey` now supports type `complex` as value columns (not as key columns), [#1444](https://github.com/Rdatatable/data.table/issues/1444). Thanks Gareth Ward for the report.
 
-21. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z1<z2` means `Re(z1) < Re(z2) | (Re(z1) == Re(z2) & Im(z1) < Im(z2))`).
+21. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z1<z2` means `Re(z1) < Re(z2) | (Re(z1) == Re(z2) & Im(z1) < Im(z2))`). By extension, several functions that rely on our internal `forderv` to work now also accept complex vectors -- `setkey`, `setorder`, grouping `by` complex, `dcast`, `frank`, `rowid`, `rleid`, `unique` by complex.
 
 
 #### BUG FIXES
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 1751be76a3..18f8dd2cad 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15377,7 +15377,17 @@ if (test_bit64) {
 }
 # sort by vector outside of table
 ord = 3:1
-test(2069.23, forder(data.table(a = 3:1), ord), 3:1)
+test(2069.23, forder(data.table(a=3:1), ord), 3:1)
+# dogroups.c coverage
+test(2069.24, data.table(c='1')[ , expression(1), by=c], error="j evaluates to type 'expression'")
+test(2069.25, data.table(c='1', d=2)[ , d := .(NULL), by=c], error='RHS is NULL when grouping :=')
+test(2069.26, data.table(c='1', d=2)[ , c(a='b'), by=c, verbose=TRUE], output='j appears to be a named vector')
+test(2069.27, data.table(c = '1', d = 2)[ , .(a = c(nm='b')), by = c, verbose = TRUE], output = 'Column 1 of j is a named vector')
+DT <- data.table(a = rep(1:3, each = 4), b = LETTERS[1:4], z = 0:3 + (4:1)*1i)
+test(2069.28, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)])
+DT = data.table(x=1:4,y=1:2,lgl=TRUE,key="x,y")
+test(2069.29, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1,
+     c(TRUE, NA, NA, NA, NA, TRUE, NA, NA, TRUE, NA, NA, NA, NA, TRUE, NA, NA))
 
 
 ###################################
diff --git a/src/dogroups.c b/src/dogroups.c
index 5f8ee1a61f..191c6ef54e 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -36,7 +36,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
     SET_STRING_ELT(bynames, i, STRING_ELT(getAttrib(groups,R_NamesSymbol), j));
     defineVar(install(CHAR(STRING_ELT(bynames,i))), VECTOR_ELT(BY,i), env);      // by vars can be used by name in j as well as via .BY
     if (SIZEOF(VECTOR_ELT(BY,i))==0)
-      error("Unsupported type '%s' in column %d of 'by'", type2char(TYPEOF(VECTOR_ELT(BY, i))), i+1);
+      error("Internal error: unsupported size-0 type '%s' in column %d of 'by' should have been caught earlier", type2char(TYPEOF(VECTOR_ELT(BY, i))), i+1); // #nocov
   }
   setAttrib(BY, R_NamesSymbol, bynames); // Fix for #5415 - BY doesn't retain names anymore
   R_LockBinding(sym_BY, env);
@@ -70,7 +70,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   SEXP *nameSyms = (SEXP *)R_alloc(length(names), sizeof(SEXP));
   for(int i=0; i<length(SDall); ++i) {
     if (SIZEOF(VECTOR_ELT(SDall, i))==0)
-      error("Type %d in .SD column %d", TYPEOF(VECTOR_ELT(SDall, i)), i);
+      error("Internal error: size-0 type %d in .SD column %d should have been caught earlier", TYPEOF(VECTOR_ELT(SDall, i)), i); // #nocov
     nameSyms[i] = install(CHAR(STRING_ELT(names, i)));
     // fixes http://stackoverflow.com/questions/14753411/why-does-data-table-lose-class-definition-in-sd-after-group-by
     copyMostAttrib(VECTOR_ELT(dt,INTEGER(dtcols)[i]-1), VECTOR_ELT(SDall,i));  // not names, otherwise test 778 would fail
@@ -153,7 +153,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
           SET_VECTOR_ELT(VECTOR_ELT(SDall,j),0,R_NilValue);
           break;
         default:
-          error("Logical error. Type of column should have been checked by now");
+          error("Internal error. Type of column should have been checked by now"); // #nocov
         }
       }
       grpn = 1;  // it may not be 1 e.g. test 722. TODO: revisit.
@@ -161,9 +161,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
       INTEGER(I)[0] = 0;
       for (int j=0; j<length(xSD); ++j) {
         switch (TYPEOF(VECTOR_ELT(xSD, j))) {
-        case LGLSXP :
-          LOGICAL(VECTOR_ELT(xSD,j))[0] = NA_LOGICAL;
-          break;
+        case LGLSXP : // #nocov
+          LOGICAL(VECTOR_ELT(xSD,j))[0] = NA_LOGICAL; // #nocov
+          break; // #nocov
         case INTSXP :
           INTEGER(VECTOR_ELT(xSD,j))[0] = NA_INTEGER;
           break;
@@ -171,16 +171,18 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
           REAL(VECTOR_ELT(xSD,j))[0] = NA_REAL;
           break;
         case CPLXSXP : {
+          // TODO: test; requires bmerge.c accomodation for CPLXSXP
           COMPLEX(VECTOR_ELT(xSD, j))[0] = NA_CPLX;
         }  break;
         case STRSXP :
           SET_STRING_ELT(VECTOR_ELT(xSD,j),0,NA_STRING);
           break;
         case VECSXP :
+          // TODO: test; requires ability to merge on list columns
           SET_VECTOR_ELT(VECTOR_ELT(xSD,j),0,R_NilValue);
           break;
         default:
-          error("Logical error. Type of column should have been checked by now"); // #nocov
+          error("Internal error. Type of column should have been checked by now"); // #nocov
         }
       }
     } else {
@@ -446,7 +448,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
           for (int r=0; r<maxn; ++r) SET_VECTOR_ELT(target,thisansloc+r,R_NilValue);
           break;
         default:
-          error("Logical error. Type of column should have been checked by now");
+          error("Internal error. Type of column should have been checked by now"); // #nocov
         }
       } else {
         // thislen>0

From 009935cfaad6b7bb9cc72a4e7b4d4a9894fa6e6b Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Thu, 18 Jul 2019 12:28:59 +0800
Subject: [PATCH 19/25] one more nocov

---
 src/dogroups.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dogroups.c b/src/dogroups.c
index 191c6ef54e..3ea0b0ba9b 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -84,7 +84,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
   SEXP *xknameSyms = (SEXP *)R_alloc(length(xknames), sizeof(SEXP));
   for(int i=0; i<length(xSD); ++i) {
     if (SIZEOF(VECTOR_ELT(xSD, i))==0)
-      error("Type %d in .xSD column %d", TYPEOF(VECTOR_ELT(xSD, i)), i);
+      error("Internal error: type %d in .xSD column %d should have been caught by now", TYPEOF(VECTOR_ELT(xSD, i)), i); // #nocov
     xknameSyms[i] = install(CHAR(STRING_ELT(xknames, i)));
   }
 

From f59dc57bab7a517a13c3befcabf8ade6c24d6ec4 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Thu, 18 Jul 2019 13:53:56 +0800
Subject: [PATCH 20/25] actually hit LGLSXP branch!

---
 inst/tests/tests.Rraw | 4 ++++
 src/dogroups.c        | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 18f8dd2cad..0227e05cca 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15388,6 +15388,10 @@ test(2069.28, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)])
 DT = data.table(x=1:4,y=1:2,lgl=TRUE,key="x,y")
 test(2069.29, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1,
      c(TRUE, NA, NA, NA, NA, TRUE, NA, NA, TRUE, NA, NA, NA, NA, TRUE, NA, NA))
+set.seed(45L)
+DT1 = data.table(a = sample(3L, 15L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 15L, TRUE))
+DT2 = data.table(a = sample(3L, 6L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 6L, TRUE))
+test(2069.30, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L))
 
 
 ###################################
diff --git a/src/dogroups.c b/src/dogroups.c
index 3ea0b0ba9b..ea036d0bff 100644
--- a/src/dogroups.c
+++ b/src/dogroups.c
@@ -161,9 +161,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
       INTEGER(I)[0] = 0;
       for (int j=0; j<length(xSD); ++j) {
         switch (TYPEOF(VECTOR_ELT(xSD, j))) {
-        case LGLSXP : // #nocov
-          LOGICAL(VECTOR_ELT(xSD,j))[0] = NA_LOGICAL; // #nocov
-          break; // #nocov
+        case LGLSXP :
+          LOGICAL(VECTOR_ELT(xSD,j))[0] = NA_LOGICAL;
+          break;
         case INTSXP :
           INTEGER(VECTOR_ELT(xSD,j))[0] = NA_INTEGER;
           break;

From 0338226fea1c7e9406933b75d2f9e4c144d81d4a Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Thu, 18 Jul 2019 14:15:57 +0800
Subject: [PATCH 21/25] more coverage

---
 inst/tests/tests.Rraw | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 0227e05cca..88a04b0eef 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15361,38 +15361,40 @@ test(2069.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
 test(2069.13, DT[ , rowid(z, grp)], rep(1L, 6L))
 test(2069.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
 test(2069.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L)))
-test(2069.16, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9)))
-test(2069.17, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9)))
-test(2069.18, uniqueN(DT$z), 4L)
+test(2069.16, rleidv(DT, "z"), c(1L, 1L, 2L, 2L, 3L, 4L))
+test(2069.17, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9)))
+test(2069.18, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9)))
+test(2069.19, uniqueN(DT$z), 4L)
 
 # setkey, setorder work
 DT = data.table(a = 2:1, z = 0 + (1:0)*1i)
-test(2069.19, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
-test(2069.20, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i))
+test(2069.20, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
+test(2069.21, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i))
 
 ## assorted coverage tests from along the way
 if (test_bit64) {
-  test(2069.21, is.sorted(as.integer64(10:1)), FALSE)
-  test(2069.22, is.sorted(as.integer64(1:10)))
+  test(2069.22, is.sorted(as.integer64(10:1)), FALSE)
+  test(2069.23, is.sorted(as.integer64(1:10)))
 }
 # sort by vector outside of table
 ord = 3:1
-test(2069.23, forder(data.table(a=3:1), ord), 3:1)
+test(2069.24, forder(data.table(a=3:1), ord), 3:1)
 # dogroups.c coverage
-test(2069.24, data.table(c='1')[ , expression(1), by=c], error="j evaluates to type 'expression'")
-test(2069.25, data.table(c='1', d=2)[ , d := .(NULL), by=c], error='RHS is NULL when grouping :=')
-test(2069.26, data.table(c='1', d=2)[ , c(a='b'), by=c, verbose=TRUE], output='j appears to be a named vector')
-test(2069.27, data.table(c = '1', d = 2)[ , .(a = c(nm='b')), by = c, verbose = TRUE], output = 'Column 1 of j is a named vector')
+test(2069.25, data.table(c='1')[ , expression(1), by=c], error="j evaluates to type 'expression'")
+test(2069.26, data.table(c='1', d=2)[ , d := .(NULL), by=c], error='RHS is NULL when grouping :=')
+test(2069.27, data.table(c='1', d=2)[ , c(a='b'), by=c, verbose=TRUE], output='j appears to be a named vector')
+test(2069.28, data.table(c = '1', d = 2)[ , .(a = c(nm='b')), by = c, verbose = TRUE], output = 'Column 1 of j is a named vector')
 DT <- data.table(a = rep(1:3, each = 4), b = LETTERS[1:4], z = 0:3 + (4:1)*1i)
-test(2069.28, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)])
+test(2069.29, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)])
 DT = data.table(x=1:4,y=1:2,lgl=TRUE,key="x,y")
-test(2069.29, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1,
+test(2069.30, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1,
      c(TRUE, NA, NA, NA, NA, TRUE, NA, NA, TRUE, NA, NA, NA, NA, TRUE, NA, NA))
 set.seed(45L)
 DT1 = data.table(a = sample(3L, 15L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 15L, TRUE))
 DT2 = data.table(a = sample(3L, 6L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 6L, TRUE))
-test(2069.30, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L))
-
+test(2069.31, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L))
+DT = data.table(z = 1i)
+test(2069.32, DT[DT, on = 'z'], error = "Type 'complex' not supported for merging")
 
 ###################################
 #  Add new tests above this line  #

From 598097bb556db1f6bdcff2544f05c11b662a2625 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Fri, 19 Jul 2019 08:48:38 +0800
Subject: [PATCH 22/25] use direct double comparison instead of type punning

---
 inst/tests/tests.Rraw | 36 +++++++++++++++++++-----------------
 src/uniqlist.c        |  5 ++---
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 88a04b0eef..5edb64efbb 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15361,40 +15361,42 @@ test(2069.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L))
 test(2069.13, DT[ , rowid(z, grp)], rep(1L, 6L))
 test(2069.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L))
 test(2069.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L)))
-test(2069.16, rleidv(DT, "z"), c(1L, 1L, 2L, 2L, 3L, 4L))
-test(2069.17, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9)))
-test(2069.18, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9)))
-test(2069.19, uniqueN(DT$z), 4L)
+# handling doubles properly
+test(2069.16, rleid(c(1i, 1.1i)), 1:2)
+test(2069.17, rleidv(DT, "z"), c(1L, 1L, 2L, 2L, 3L, 4L))
+test(2069.18, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9)))
+test(2069.19, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9)))
+test(2069.20, uniqueN(DT$z), 4L)
 
 # setkey, setorder work
 DT = data.table(a = 2:1, z = 0 + (1:0)*1i)
-test(2069.20, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
-test(2069.21, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i))
+test(2069.21, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z'))
+test(2069.22, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i))
 
 ## assorted coverage tests from along the way
 if (test_bit64) {
-  test(2069.22, is.sorted(as.integer64(10:1)), FALSE)
-  test(2069.23, is.sorted(as.integer64(1:10)))
+  test(2069.23, is.sorted(as.integer64(10:1)), FALSE)
+  test(2069.24, is.sorted(as.integer64(1:10)))
 }
 # sort by vector outside of table
 ord = 3:1
-test(2069.24, forder(data.table(a=3:1), ord), 3:1)
+test(2069.25, forder(data.table(a=3:1), ord), 3:1)
 # dogroups.c coverage
-test(2069.25, data.table(c='1')[ , expression(1), by=c], error="j evaluates to type 'expression'")
-test(2069.26, data.table(c='1', d=2)[ , d := .(NULL), by=c], error='RHS is NULL when grouping :=')
-test(2069.27, data.table(c='1', d=2)[ , c(a='b'), by=c, verbose=TRUE], output='j appears to be a named vector')
-test(2069.28, data.table(c = '1', d = 2)[ , .(a = c(nm='b')), by = c, verbose = TRUE], output = 'Column 1 of j is a named vector')
+test(2069.26, data.table(c='1')[ , expression(1), by=c], error="j evaluates to type 'expression'")
+test(2069.27, data.table(c='1', d=2)[ , d := .(NULL), by=c], error='RHS is NULL when grouping :=')
+test(2069.28, data.table(c='1', d=2)[ , c(a='b'), by=c, verbose=TRUE], output='j appears to be a named vector')
+test(2069.29, data.table(c = '1', d = 2)[ , .(a = c(nm='b')), by = c, verbose = TRUE], output = 'Column 1 of j is a named vector')
 DT <- data.table(a = rep(1:3, each = 4), b = LETTERS[1:4], z = 0:3 + (4:1)*1i)
-test(2069.29, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)])
+test(2069.30, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)])
 DT = data.table(x=1:4,y=1:2,lgl=TRUE,key="x,y")
-test(2069.30, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1,
+test(2069.31, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1,
      c(TRUE, NA, NA, NA, NA, TRUE, NA, NA, TRUE, NA, NA, NA, NA, TRUE, NA, NA))
 set.seed(45L)
 DT1 = data.table(a = sample(3L, 15L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 15L, TRUE))
 DT2 = data.table(a = sample(3L, 6L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 6L, TRUE))
-test(2069.31, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L))
+test(2069.32, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L))
 DT = data.table(z = 1i)
-test(2069.32, DT[DT, on = 'z'], error = "Type 'complex' not supported for merging")
+test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' not supported for merging")
 
 ###################################
 #  Add new tests above this line  #
diff --git a/src/uniqlist.c b/src/uniqlist.c
index 10f9455c93..318722930b 100644
--- a/src/uniqlist.c
+++ b/src/uniqlist.c
@@ -203,7 +203,7 @@ SEXP rleid(SEXP l, SEXP cols) {
         case CPLXSXP: {
           // tried to make long long complex * but got a warning that it's a GNU extension
           Rcomplex *pz = COMPLEX(jcol);
-          same = (long long)pz[i].r == (long long)pz[i-1].r && (long long)pz[i].i == (long long)pz[i-1].i;
+          same = pz[i].r == pz[i-1].r && pz[i].i == pz[i-1].i;
         } break;
         default :
           error("Type '%s' not supported", type2char(TYPEOF(jcol)));  // # nocov
@@ -240,8 +240,7 @@ SEXP rleid(SEXP l, SEXP cols) {
     case CPLXSXP: {
       Rcomplex *pzjcol = COMPLEX(jcol);
       for (R_xlen_t i=1; i<nrow; i++) {
-        bool same = (long long)pzjcol[i].r == (long long)pzjcol[i-1].r &&
-                    (long long)pzjcol[i].i == (long long)pzjcol[i-1].i;
+        bool same = pzjcol[i].r == pzjcol[i-1].r && pzjcol[i].i == pzjcol[i-1].i;
         ians[i] = (grp += !same);
       }
     } break;

From 2df22a354dae377fc3b17841861af1a63aecd383 Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Thu, 18 Jul 2019 20:10:21 -0700
Subject: [PATCH 23/25] replaced big block with smaller modification to the
 main loop; also halves the working ram needed as it reuses the same tmp for
 the rerun

---
 src/forder.c  | 68 ++++++++++++++-------------------------------------
 src/reorder.c |  1 +
 2 files changed, 19 insertions(+), 50 deletions(-)

diff --git a/src/forder.c b/src/forder.c
index 9a390c8d9b..3dd2031a4b 100644
--- a/src/forder.c
+++ b/src/forder.c
@@ -449,55 +449,6 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
       error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow);
     if (TYPEOF(VECTOR_ELT(DT, by_i-1)) == CPLXSXP) n_cplx++;
   }
-  if (n_cplx) {
-    // we don't expect users to need complex sorting extensively
-    //   or on massive data sets, so we take the approach of
-    //  splitting a complex vector into its real & imaginary parts
-    //  and using the regular forderv machinery to sort; a baremetal
-    //  implementation would at root do the same, but the approach
-    //  here is a bit more slapdash with respect to memory efficiency
-    //  (seen clearly here at C from the 3+2*n_cplx PROTECT() calls)
-    int n_out = length(by) + n_cplx;
-    SEXP new_dt = PROTECT(allocVector(VECSXP, n_out)); n_protect++;
-    SEXP new_asc = PROTECT(allocVector(INTSXP, n_out)); n_protect++;
-    // will be simply 1:n_out
-    SEXP new_by = PROTECT(allocVector(INTSXP, n_out)); n_protect++;
-    int j = 0;
-    for (int i=0; i<length(by); i++) {
-      int by_idx = INTEGER(by)[i]-1;
-      if (TYPEOF(VECTOR_ELT(DT, by_idx)) == CPLXSXP) {
-        // I don't see any shorthand way of splitting of the real&imaginary components,
-        //   i.e., a shorthand way of doing Re(z), Im(z). That includes searching all of
-        //   the r-source code & all of the r-devel archives. So just reproduce Re(), Im()
-        //   as done in do_cmathfuns in complex.c
-        SEXP realPart = PROTECT(allocVector(REALSXP, nrow)); n_protect++;
-        SEXP cplxPart = PROTECT(allocVector(REALSXP, nrow)); n_protect++;
-        double *pre = REAL(realPart);
-        double *pim = REAL(cplxPart);
-        Rcomplex *pz = COMPLEX(VECTOR_ELT(DT, by_idx));
-        for (int i = 0; i < nrow; i++) {
-          pre[i] = pz[i].r;
-          pim[i] = pz[i].i;
-        }
-        SET_VECTOR_ELT(new_dt, j, realPart);
-        SET_VECTOR_ELT(new_dt, j+1, cplxPart);
-        INTEGER(new_asc)[j] = INTEGER(ascArg)[i];
-        INTEGER(new_asc)[j+1] = INTEGER(ascArg)[i];
-        INTEGER(new_by)[j] = j+1;
-        INTEGER(new_by)[j+1] = j+2;
-        j += 2;
-      } else {
-        SET_VECTOR_ELT(new_dt, j, VECTOR_ELT(DT, by_idx));
-        INTEGER(new_asc)[j] = INTEGER(ascArg)[i];
-        INTEGER(new_by)[j] = j+1;
-        j += 1;
-      }
-    }
-    DT = new_dt;
-    ascArg = new_asc;
-    by = new_by;
-  }
-
   if (!isLogical(retGrpArg) || LENGTH(retGrpArg)!=1 || INTEGER(retGrpArg)[0]==NA_LOGICAL) error("retGrp must be TRUE or FALSE");
   retgrp = LOGICAL(retGrpArg)[0]==TRUE;
   if (!isLogical(sortGroupsArg) || LENGTH(sortGroupsArg)!=1 || INTEGER(sortGroupsArg)[0]==NA_LOGICAL ) error("sortGroups must be TRUE or FALSE");
@@ -527,11 +478,14 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
   savetl_init();   // from now on use Error not error
 
   int ncol=length(by);
-  key = calloc(ncol*8+1, sizeof(uint8_t *));  // needs to be before loop because part II relies on part I, column-by-column. +1 because we check NULL after last one
+  key = calloc((ncol+n_cplx)*8+1, sizeof(uint8_t *));  // needs to be before loop because part II relies on part I, column-by-column. +1 because we check NULL after last one
   // TODO: if key==NULL Error
   nradix=0; // the current byte we're writing this column to; might be squashing into it (spare>0)
   int spare=0;  // the amount of bits remaining on the right of the current nradix byte
   bool isReal=false;
+  bool complexRerun = false;   // see comments below in CPLXSXP case
+  SEXP CplxPart = R_NilValue; 
+  if (n_cplx) { CplxPart=PROTECT(allocVector(REALSXP, nrow)); n_protect++; } // one alloc is reused for each part
   TEND(2);
   for (int col=0; col<ncol; col++) {
     // Rprintf("Finding range of column %d ...\n", col);
@@ -544,6 +498,20 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
     case INTSXP : case LGLSXP :  // TODO skip LGL and assume range [0,1]
       range_i32(INTEGER(x), nrow, &min, &max, &na_count);
       break;
+    case CPLXSXP : {
+      // treat as if two separate columns of double
+      const Rcomplex *xd = COMPLEX(x);
+      double *tmp = REAL(CplxPart);
+      if (!complexRerun) {
+        for (int i=0; i<nrow; ++i) tmp[i] = xd[i].r;  // extract the real part on the first time
+        complexRerun = true;
+        col--;  // cause this loop iteration to rerun; decrement now in case of early continue below
+      } else {
+        for (int i=0; i<nrow; ++i) tmp[i] = xd[i].i;
+        complexRerun = false;
+      }
+      x = CplxPart;
+    } // !no break! so as to fall through to REAL case
     case REALSXP :
       if (inherits(x, "integer64")) {
         range_i64((int64_t *)REAL(x), nrow, &min, &max, &na_count);
diff --git a/src/reorder.c b/src/reorder.c
index c4c3543b07..e65a58f26c 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -101,3 +101,4 @@ SEXP reorder(SEXP x, SEXP order)
   UNPROTECT(nprotect);
   return(R_NilValue);
 }
+

From dd7b24a76b7c0842b340e364e5f0fdd8abdd709c Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Fri, 19 Jul 2019 13:09:14 -0700
Subject: [PATCH 24/25] memcmp for complex instead of == on double

---
 src/uniqlist.c | 41 +++++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/src/uniqlist.c b/src/uniqlist.c
index 318722930b..a7f6577bd1 100644
--- a/src/uniqlist.c
+++ b/src/uniqlist.c
@@ -196,14 +196,13 @@ SEXP rleid(SEXP l, SEXP cols) {
           break;
         case REALSXP : {
           long long *ll = (long long *)REAL(jcol);
-          same = ll[i]==ll[i-1]; }
+          same = ll[i]==ll[i-1];
           // 8 bytes of bits are identical. For real (no rounding currently) and integer64
           // long long == 8 bytes checked in init.c
-          break;
+        } break;
         case CPLXSXP: {
-          // tried to make long long complex * but got a warning that it's a GNU extension
           Rcomplex *pz = COMPLEX(jcol);
-          same = pz[i].r == pz[i-1].r && pz[i].i == pz[i-1].i;
+          same = memcmp(&pz[i], &pz[i-1], sizeof(Rcomplex))==0; // compiler optimization should replace library call with best 16-byte fixed method
         } break;
         default :
           error("Type '%s' not supported", type2char(TYPEOF(jcol)));  // # nocov
@@ -215,32 +214,30 @@ SEXP rleid(SEXP l, SEXP cols) {
     SEXP jcol = VECTOR_ELT(l, icols[0]-1);
     switch (TYPEOF(jcol)) {
     case INTSXP : case LGLSXP : {
-        int *ijcol = INTEGER(jcol);
-        for (R_xlen_t i=1; i<nrow; i++) {
-          bool same = ijcol[i]==ijcol[i-1];
-          ians[i] = (grp+=!same);
-        }
+      int *ijcol = INTEGER(jcol);
+      for (R_xlen_t i=1; i<nrow; i++) {
+        bool same = ijcol[i]==ijcol[i-1];
+        ians[i] = (grp+=!same);
       }
-      break;
+    } break;
     case STRSXP : {
-        for (R_xlen_t i=1; i<nrow; i++) {
-          bool same = STRING_ELT(jcol,i)==STRING_ELT(jcol,i-1);
-          ians[i] = (grp+=!same);
-        }
+      const SEXP *jd = STRING_PTR(jcol);
+      for (R_xlen_t i=1; i<nrow; i++) {
+        bool same = jd[i]==jd[i-1];
+        ians[i] = (grp+=!same);
       }
-      break;
+    } break;
     case REALSXP : {
-        long long *lljcol = (long long *)REAL(jcol);
-        for (R_xlen_t i=1; i<nrow; i++) {
-          bool same = lljcol[i]==lljcol[i-1];
-          ians[i] = (grp+=!same);
-        }
+      long long *lljcol = (long long *)REAL(jcol);
+      for (R_xlen_t i=1; i<nrow; i++) {
+        bool same = lljcol[i]==lljcol[i-1];
+        ians[i] = (grp+=!same);
       }
-      break;
+    } break;
     case CPLXSXP: {
       Rcomplex *pzjcol = COMPLEX(jcol);
       for (R_xlen_t i=1; i<nrow; i++) {
-        bool same = pzjcol[i].r == pzjcol[i-1].r && pzjcol[i].i == pzjcol[i-1].i;
+        bool same = memcmp(&pzjcol[i], &pzjcol[i-1], sizeof(Rcomplex))==0;
         ians[i] = (grp += !same);
       }
     } break;

From a9d91652a98e9e64769e96b51514254929ff6d9f Mon Sep 17 00:00:00 2001
From: mattdowle <mattjdowle@gmail.com>
Date: Fri, 19 Jul 2019 14:21:46 -0700
Subject: [PATCH 25/25] news item tidy

---
 NEWS.md               | 13 +++----------
 inst/tests/tests.Rraw |  2 +-
 src/bmerge.c          |  2 +-
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 85ec154838..35f51f66ea 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -96,7 +96,7 @@
 
 16. `as.data.table` now unpacks columns in a `data.frame` which are themselves a `data.frame`. This need arises when parsing JSON, a corollary in [#3369](https://github.com/Rdatatable/data.table/issues/3369#issuecomment-462662752). `data.table` does not allow columns to be objects which themselves have columns (such as `matrix` and `data.frame`), unlike `data.frame` which does. Bug fix 19 in v1.12.2 (see below) added a helpful error (rather than segfault) to detect such invalid `data.table`, and promised that `as.data.table()` would unpack these columns in the next release (i.e. this release) so that the invalid `data.table` is not created in the first place.
 
-17. `CJ` has been ported to C and parallelized, thanks to a PR by Michael Chirico, [#3596](https://github.com/Rdatatable/data.table/pull/3596). All types benefit (including newly supported complex, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690)), and as in many `data.table` operations, factors benefit more than character.
+17. `CJ` has been ported to C and parallelized, thanks to a PR by Michael Chirico, [#3596](https://github.com/Rdatatable/data.table/pull/3596). All types benefit, but, as in many `data.table` operations, factors benefit more than character.
 
     ```R
     # default 4 threads on a laptop with 16GB RAM and 8 logical CPU
@@ -114,7 +114,7 @@
     #  0.357   0.763   0.292  # now
     ```
 
-18. New function `coalesce(...)` has been written in C, and is multithreaded for numeric, complex, and factor types. It replaces missing values according to a prioritized list of candidates (as per SQL COALESCE, `dplyr::coalesce`, and `hutils::coalesce`), [#3424](https://github.com/Rdatatable/data.table/issues/3424). It accepts any number of vectors in several forms. For example, given three vectors `x`, `y`, and `z`, where each `NA` in `x` is to be replaced by the corresponding value in `y` if that is non-NA, else the corresponding value in `z`, the following equivalent forms are all accepted: `coalesce(x,y,z)`, `coalesce(x,list(y,z))`, and `coalesce(list(x,y,z))`.
+18. New function `coalesce(...)` has been written in C, and is multithreaded for `numeric` and `factor`. It replaces missing values according to a prioritized list of candidates (as per SQL COALESCE, `dplyr::coalesce`, and `hutils::coalesce`), [#3424](https://github.com/Rdatatable/data.table/issues/3424). It accepts any number of vectors in several forms. For example, given three vectors `x`, `y`, and `z`, where each `NA` in `x` is to be replaced by the corresponding value in `y` if that is non-NA, else the corresponding value in `z`, the following equivalent forms are all accepted: `coalesce(x,y,z)`, `coalesce(x,list(y,z))`, and `coalesce(list(x,y,z))`.
 
     ```R
     # default 4 threads on a laptop with 16GB RAM and 8 logical CPU
@@ -131,12 +131,7 @@
     # TRUE
     ```
 
-19. `shift` now supports type `complex`, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690).
-
-20. `setkey` now supports type `complex` as value columns (not as key columns), [#1444](https://github.com/Rdatatable/data.table/issues/1444). Thanks Gareth Ward for the report.
-
-21. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z1<z2` means `Re(z1) < Re(z2) | (Re(z1) == Re(z2) & Im(z1) < Im(z2))`). By extension, several functions that rely on our internal `forderv` to work now also accept complex vectors -- `setkey`, `setorder`, grouping `by` complex, `dcast`, `frank`, `rowid`, `rleid`, `unique` by complex.
-
+19. Type `complex` is now supported by `setkey`, `setorder`, `:=`, `by=`, `keyby=`, `shift`, `dcast`, `frank`, `rowid`, `rleid`, `CJ`, `coalesce`, `unique`, and `uniqueN`, [#3690](https://github.com/Rdatatable/data.table/issues/3690). Thanks to Gareth Ward and Elio Campitelli for their reports and input. Sorting `complex` is achieved the same way as base R; i.e., first by the real part then by the imaginary part (as if the `complex` column were two separate columns of `double`). There is no plan to support joining/merging on `complex` columns until a user demonstrates a need for that.
 
 #### BUG FIXES
 
@@ -201,8 +196,6 @@
 
 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report.
 
-25. Complex columns used in `j` during grouping would get mangled, [#3639](https://github.com/Rdatatable/data.table/issues/3639). A related bug prevented assigning complex values using `:=` except for full-column plonks. We still do not support grouping `by` a complex column. Thanks to @eliocamp for filing the bug report.
-
 #### NOTES
 
 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below.
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 5edb64efbb..5f149ab845 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -15396,7 +15396,7 @@ DT1 = data.table(a = sample(3L, 15L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 15
 DT2 = data.table(a = sample(3L, 6L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 6L, TRUE))
 test(2069.32, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L))
 DT = data.table(z = 1i)
-test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' not supported for merging")
+test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' not supported for joining/merging")
 
 ###################################
 #  Add new tests above this line  #
diff --git a/src/bmerge.c b/src/bmerge.c
index 14b774c4b3..ec003ff547 100644
--- a/src/bmerge.c
+++ b/src/bmerge.c
@@ -406,7 +406,7 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg
   }
     break;
   default:
-    error("Type '%s' not supported for merging", type2char(TYPEOF(xc)));
+    error("Type '%s' not supported for joining/merging", type2char(TYPEOF(xc)));
   }
   if (xlow<xupp-1) { // if value found, low and upp surround it, unlike standard binary search where low falls on it
     if (col<ncol-1) {