From 73a63ca5fe9911c57e9ab6212c3f39911cc8ec33 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 13:21:15 +0800 Subject: [PATCH 01/25] Closes #1444 -- setkey works on tables with complex columns --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 3 +++ src/reorder.c | 21 ++++++++++++++++----- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 10e3d5d5ea..81a50403ae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -128,6 +128,8 @@ identical(y1,y2) && identical(y1,y3) # TRUE ``` + +19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not well-ordered (and thus not supported as keys). Thanks @atalikami for the report. #### BUG FIXES diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1c720d4de9..838bf70306 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15200,6 +15200,9 @@ d2 = data.table(r = 1:5, s = seq(0L, 20L, 5L)) test(2062.1, d1[d2, on = .(a <= s, b >= s), j = .SD], ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20))) test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) +# #1444 -- support for ordering tables with complex columns +DT = data.table(a = 2:1, z = complex(0, 0:1)) +test(2063, setkey(DT, a), data.table(a = 1:2, z = complex(0, 1:0), key='a')) ################################### # Add new tests above this line # diff --git a/src/reorder.c b/src/reorder.c index 337d09b70f..62116def88 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -1,4 +1,5 @@ #include "data.table.h" +#include SEXP reorder(SEXP x, SEXP order) { @@ -13,8 +14,8 @@ SEXP reorder(SEXP x, SEXP order) ncol = length(x); for (int i=0; i maxSize) @@ -22,8 +23,8 @@ SEXP reorder(SEXP x, SEXP order) if (ALTREP(v)) SET_VECTOR_ELT(x,i,duplicate(v)); // expand compact vector in place ready for reordering by reference } } else { - if (SIZEOF(x)!=4 && SIZEOF(x)!=8) - error("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported", type2char(TYPEOF(x))); + if (SIZEOF(x)!=4 && SIZEOF(x)!=8 && SIZEOF(x)!=16) + error("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%d)", type2char(TYPEOF(x)), SIZEOF(x)); if (ALTREP(x)) error("Internal error in reorder.c: cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4 and report this as a bug."); // # nocov maxSize = SIZEOF(x); nrow = length(x); @@ -68,7 +69,7 @@ SEXP reorder(SEXP x, SEXP order) tmp[i] = cvd[idx[i]-1]; // copies 4 bytes; including pointers on 32bit (STRSXP and VECSXP) } memcpy(vd+start, tmp+start, (end-start+1)*size); - } else { + } else if (size==8) { double *vd = (double *)DATAPTR(v); const double *restrict cvd = vd; double *restrict tmp = (double *)TMP; @@ -77,6 +78,16 @@ SEXP reorder(SEXP x, SEXP order) tmp[i] = cvd[idx[i]-1]; // copies 8 bytes; including pointers on 64bit (STRSXP and VECSXP) } memcpy(vd+start, tmp+start, (end-start+1)*size); + } else { + // #1444 -- support for copying CPLXSXP (which have size 16) + double complex *vd = (double complex *)DATAPTR(v); + const double complex *restrict cvd = vd; + double complex *restrict tmp = (double complex *)TMP; + #pragma omp parallel for num_threads(getDTthreads()) + for (int i=start; i<=end; i++) { + tmp[i] = cvd[idx[i]-1]; // copies 16 bytes; including pointers on 64bit (STRSXP and VECSXP) + } + memcpy(vd+start, tmp+start, (end-start+1)*size); } } // It's ok to ignore write barrier, and in parallel too, only because this reorder() function accepts and checks From bdce4a5ff5f28ce5ef4d2c6fd8694a4aa156866f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 13:33:13 +0800 Subject: [PATCH 02/25] extra test for group operation mentioned in issue --- inst/tests/tests.Rraw | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 838bf70306..ccee85a383 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15202,7 +15202,8 @@ test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) # #1444 -- support for ordering tables with complex columns DT = data.table(a = 2:1, z = complex(0, 0:1)) -test(2063, setkey(DT, a), data.table(a = 1:2, z = complex(0, 1:0), key='a')) +test(2063.1, setkey(copy(DT), a), data.table(a = 1:2, z = complex(0, 1:0), key='a')) +test(2063.2, DT[ , abs(z), by = a], data.table(a = 2:1, V1 = c(0, 1))) ################################### # Add new tests above this line # From e6533dca399cc0fcb77ee776f0eb8166d404f7c9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 12 Jul 2019 00:05:07 +0800 Subject: [PATCH 03/25] new tests for coverage --- inst/tests/tests.Rraw | 9 +++++++-- src/reorder.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ccee85a383..c86b55b67f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15202,8 +15202,13 @@ test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) # #1444 -- support for ordering tables with complex columns DT = data.table(a = 2:1, z = complex(0, 0:1)) -test(2063.1, setkey(copy(DT), a), data.table(a = 1:2, z = complex(0, 1:0), key='a')) -test(2063.2, DT[ , abs(z), by = a], data.table(a = 2:1, V1 = c(0, 1))) +test(2063.1, setkey(copy(DT), a), data.table(a=1:2, z=complex(0, 1:0), key='a')) +test(2063.2, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(0, 1))) +# raw continues not to be supported +DT = data.table(ID=2:1, r=as.raw(0:1)) +test(2063.3, setkey(DT, ID), error="Item 2 of list is type 'raw'") +# setreordervec triggers !isNewList branch for coverage +test(2063.4, setreordervec(DT$r), error="reorder accepts vectors but this non-VECSXP") ################################### # Add new tests above this line # diff --git a/src/reorder.c b/src/reorder.c index 62116def88..d55d147283 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -124,7 +124,7 @@ SEXP setrev(SEXP x) { ((int *)xt)[len-1-j] = *(int *)tmp; } } else { - if (size!=8) error("Size of x isn't 4 or 8"); + if (size!=8) error("Size of x isn't 4, 8 or 16"); tmp = (char *)Calloc(1, double); if (!tmp) error("unable to allocate temporary working memory for reordering x"); for (j=0;j Date: Fri, 12 Jul 2019 00:20:24 +0800 Subject: [PATCH 04/25] missing arg --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c86b55b67f..777bed1706 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15208,7 +15208,7 @@ test(2063.2, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(0, 1))) DT = data.table(ID=2:1, r=as.raw(0:1)) test(2063.3, setkey(DT, ID), error="Item 2 of list is type 'raw'") # setreordervec triggers !isNewList branch for coverage -test(2063.4, setreordervec(DT$r), error="reorder accepts vectors but this non-VECSXP") +test(2063.4, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP") ################################### # Add new tests above this line # From 55bd50182e1578c5f9e428fa62a5d11ec2d0abfe Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 13 Jul 2019 05:22:07 +0800 Subject: [PATCH 05/25] Closes #1703 -- forderv handles complex input --- NEWS.md | 2 ++ R/setkey.R | 34 +++++++++++++++++++++++++++++++++- inst/tests/tests.Rraw | 11 +++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 691647cba1..c350ac0393 100644 --- a/NEWS.md +++ b/NEWS.md @@ -128,6 +128,8 @@ identical(y1,y2) && identical(y1,y3) # TRUE ``` + +19. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z1= s), j = .SD], ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20))) test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) +# forderv (and downstream functions) handles complex vector input, part of #3690 +DT = data.table( + a = c(1L, 1L, 8L, 2L, 1L, 9L, 3L, 2L, 6L, 6L), + b = c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i) +) +test(2063.1, DT[order(a, b)], DT[base::order(a, b)]) +test(2063.2, DT[order(a, -b)], DT[base::order(a, -b)]) +test(2063.3, forderv(DT$b, order = 1L), base::order(DT$b)) +test(2063.4, forderv(DT$b, order = -1L), base::order(-DT$b)) +test(2063.5, forderv(DT, by = 2:1), forderv(DT[ , 2:1])) +test(2063.6, forderv(DT, by = 2:1, order = c(1L, -1L)), DT[order(b, -a), which = TRUE]) ################################### # Add new tests above this line # From 2e8c9966c7e2177771672ece974c31e9527a9d4f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 13 Jul 2019 18:15:14 +0800 Subject: [PATCH 06/25] slight re-tooling, now passing tests --- R/setkey.R | 24 +++++++++++++----------- inst/tests/tests.Rraw | 15 ++++++++------- src/forder.c | 2 +- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/R/setkey.R b/R/setkey.R index 6f80f80349..f46580cbde 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -222,21 +222,23 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las if ( (length(order) != 1L && length(order) != length(by)) || !all(order %in% c(1L, -1L)) ) stop("x is a list, length(order) must be either =1 or =length(by) and each value should be 1 or -1 for each column in 'by', corresponding to ascending or descending order, respectively. If length(order) == 1, it will be recycled to length(by).") if (length(order) == 1L) order = rep(order, length(by)) - is_cplx = vapply_1b(x, is.complex) + nn = length(x) + is_cplx = vapply_1b(by, function(i) { + if (i < 1L) stop("'by' must be positive, received ", i, call. = FALSE) + if (i > nn) stop("'by' must be in [1,", nn, "], received ", i, call. = FALSE) + is.complex(x[[i]]) + }) if (any(is_cplx)) { - by_order = order(by) - out_x = vector('list', length(x) + sum(is_cplx)) - # order of order matches by, but is_cplx doesn't - order = rep(order, is_cplx[by_order] + 1L) + out_x = vector('list', length(by) + sum(is_cplx)) + order = rep(order, is_cplx + 1L) j = 1L - for (i in seq_along(is_cplx)) { - by_i = by_order[i] - if (is_cplx[by_i]) { - out_x[[j]] = Re(x[[by_i]]) - out_x[[j + 1L]] = Im(x[[by_i]]) + for (i in seq_along(by)) { + if (is_cplx[i]) { + out_x[[j]] = Re(x[[by[i]]]) + out_x[[j + 1L]] = Im(x[[by[i]]]) j = j + 2L } else { - out_x[[j]] = x[[by_i]] + out_x[[j]] = x[[by[i]]] j = j + 1L } } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index de4106d582..32b1196283 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -2776,7 +2776,7 @@ test(988, unique(dt, by='B'), dt[!duplicated(df[, 'B'])]) test(989, unique(dt, by='C'), dt[!duplicated(df[, 'C'])]) test(990, unique(dt, by=c('B', 'C')), dt[!duplicated(df[, c('B', 'C')])]) test(991, unique(dt, by=NULL), dt[!duplicated(df)]) -test(991.1, unique(dt, by=4), error="'by' value 4 out of range.*1,3") +test(991.1, unique(dt, by=4), error="'by' must be in [1,3], received 4") test(991.2, unique(dt, by=c(1,3.1)), error="'by' is type 'double' and one or more items in it are not whole integers") test(991.3, unique(dt, by=2:3), dt[!duplicated(df[,c('B','C')])]) test(991.4, unique(dt, by=c('C','D','E')), error="'by' contains 'D' which is not a column name") @@ -11692,11 +11692,11 @@ test(1844.2, forder(DT,V1,V2,na.last=NA), INT(2,1,3,0,4)) # prior to v1.12.0 th # now with two NAs in that 2-group covers forder.c:forder line 1269 starting: else if (nalast == 0 && tmp==-2) { DT = data.table(c("a","a","a","b","b"),c(2,1,3,NA,NA)) test(1844.3, forder(DT,V1,V2,na.last=NA), INT(2,1,3,0,0)) -DT = data.table((0+0i)^(-3:3), 7:1) -test(1844.4, forder(DT,V1,V2), error="Column 1 of by= (1) is type 'complex', not yet supported") -test(1844.5, forder(DT,V2,V1), error="Column 2 of by= (2) is type 'complex', not yet supported") -DT = data.table((0+0i)^(-3:3), c(5L,5L,1L,2L,2L,2L,2L)) -test(1844.6, forder(DT,V2,V1), error="Column 2 of by= (2) is type 'complex', not yet supported") +DT = data.table(as.raw(0:6), 7:1) +test(1844.4, forder(DT,V1,V2), error="Column 1 of by= (1) is type 'raw', not yet supported") +test(1844.5, forder(DT,V2,V1), error="Column 2 of by= (2) is type 'raw', not yet supported") +DT = data.table(as.raw(0:6), c(5L,5L,1L,2L,2L,2L,2L)) +test(1844.6, forder(DT,V2,V1), error="Column 2 of by= (2) is type 'raw', not yet supported") # fix for non-equi joins issue #1991. Thanks to Henrik for the nice minimal example. d1 <- data.table(x = c(rep(c("b", "a", "c"), each = 3), c("a", "b")), y = c(rep(c(1, 3, 6), 3), 6, 6), id = 1:11) @@ -15211,7 +15211,8 @@ test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) # forderv (and downstream functions) handles complex vector input, part of #3690 DT = data.table( a = c(1L, 1L, 8L, 2L, 1L, 9L, 3L, 2L, 6L, 6L), - b = c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i) + b = c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i), + c = 6 ) test(2063.1, DT[order(a, b)], DT[base::order(a, b)]) test(2063.2, DT[order(a, -b)], DT[base::order(a, -b)]) diff --git a/src/forder.c b/src/forder.c index 6135260986..d810bf6536 100644 --- a/src/forder.c +++ b/src/forder.c @@ -442,7 +442,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S nrow = length(VECTOR_ELT(DT,0)); for (int i=0; i length(DT)) - error("'by' value %d out of range [1,%d]", INTEGER(by)[i], length(DT)); + error("Internal error: 'by' out of range should have been caught earlier"); // #nocov if ( nrow != length(VECTOR_ELT(DT, INTEGER(by)[i]-1)) ) error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow); } From e3a6aa6eead80dec8805bd53716cc6e30b100a02 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 13 Jul 2019 22:22:39 +0800 Subject: [PATCH 07/25] more progress; but stonewalled by bmerge --- R/bmerge.R | 2 +- R/data.table.R | 2 +- R/setkey.R | 5 +++-- inst/tests/tests.Rraw | 38 ++++++++++++++++++++++++++++++++------ 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/R/bmerge.R b/R/bmerge.R index 039ec127a5..b3aef83c2f 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -14,7 +14,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos # careful to only plonk syntax (full column) on i/x from now on otherwise user's i and x would change; # this is why shallow() is very importantly internal only, currently. - supported = c("logical", "integer", "double", "character", "factor", "integer64") + supported = c(ORDERING_TYPES, "factor", "integer64") getClass = function(x) { ans = typeof(x) diff --git a/R/data.table.R b/R/data.table.R index a13597926b..7436729314 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -829,7 +829,7 @@ replace_order = function(isub, verbose, env) { if (!is.list(byval)) stop("'by' or 'keyby' must evaluate to a vector or a list of vectors (where 'list' includes data.table and data.frame which are lists, too)") if (length(byval)==1L && is.null(byval[[1L]])) bynull=TRUE #3530 when by=(function()NULL)() if (!bynull) for (jj in seq_len(length(byval))) { - if (!typeof(byval[[jj]]) %chin% c("integer","logical","character","double")) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]") + if (!typeof(byval[[jj]]) %chin% c("integer","logical","character","double","complex")) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]") } tt = vapply_1i(byval,length) if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be length ", xnrow, "; the same length as there are rows in x (after subsetting if i is provided).") diff --git a/R/setkey.R b/R/setkey.R index f46580cbde..9da7d1bca5 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -83,7 +83,7 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU if (".xi" %chin% names(x)) stop("x contains a column called '.xi'. Conflicts with internal use by data.table.") for (i in cols) { .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column - if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.") + if (!typeof(.xi) %chin% ORDERING_TYPES) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.") } if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov @@ -181,6 +181,7 @@ is.sorted = function(x, by=seq_along(x)) { # Important to call forder.c::fsorted here, for consistent character ordering and numeric/integer64 twiddling. } +ORDERING_TYPES = c('logical', 'integer', 'double', 'complex', 'character') forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.last=FALSE) { if (!(sort || retGrp)) stop("At least one of retGrp or sort must be TRUE") @@ -364,7 +365,7 @@ setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE) if (".xi" %chin% colnames(x)) stop("x contains a column called '.xi'. Conflicts with internal use by data.table.") for (i in cols) { .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column - if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported for ordering currently.") + if (!typeof(.xi) %chin% ORDERING_TYPES) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported for ordering currently.") } if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 32b1196283..c845f4b87c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15214,12 +15214,38 @@ DT = data.table( b = c(3+9i, 10+5i, 8+2i, 10+4i, 3+3i, 1+2i, 5+1i, 8+1i, 8+2i, 10+6i), c = 6 ) -test(2063.1, DT[order(a, b)], DT[base::order(a, b)]) -test(2063.2, DT[order(a, -b)], DT[base::order(a, -b)]) -test(2063.3, forderv(DT$b, order = 1L), base::order(DT$b)) -test(2063.4, forderv(DT$b, order = -1L), base::order(-DT$b)) -test(2063.5, forderv(DT, by = 2:1), forderv(DT[ , 2:1])) -test(2063.6, forderv(DT, by = 2:1, order = c(1L, -1L)), DT[order(b, -a), which = TRUE]) +test(2063.01, DT[order(a, b)], DT[base::order(a, b)]) +test(2063.02, DT[order(a, -b)], DT[base::order(a, -b)]) +test(2063.03, forderv(DT$b, order = 1L), base::order(DT$b)) +test(2063.04, forderv(DT$b, order = -1L), base::order(-DT$b)) +test(2063.05, forderv(DT, by = 2:1), forderv(DT[ , 2:1])) +test(2063.06, forderv(DT, by = 2:1, order = c(1L, -1L)), DT[order(b, -a), which = TRUE]) + +# downstreams of forder +DT = data.table( + z = c(0, 0, 1, 1, 2, 3) + c(1, 1, 2, 2, 3, 4)*1i, + grp = rep(1:2, 3L), + v = c(3, 1, 4, 1, 5, 9) +) +unq_z = 0:3 + (1:4)*1i +test(2063.07, DT[ , .N, by=z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L))) +# uniqlist.c needs work +# test(2063.08, DT[ , .N, keyby = z], +# DT = setkey(copy(DT[.N:1]), z) +# test(2963.09, key(DT), 'z') +# test(2963.10, DT +test(2963.11, dcast(DT, z ~ grp, value.var='v', fill=0), + data.table(z=unq_z, `1`=c(3, 4, 5, 0), `2`=c(1, 1, 0, 9), key='z')) +test(2963.12, frank(DT$z), c(1.5, 1.5, 3.5, 5, 6)) +test(2963.13, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 5L, 6L)) +test(2963.14, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) +test(2963.15, DT[ , rowid(z, grp)], rep(1L, 6L)) +test(1963.16, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) + +DT1 = data.table(z = c(0+1i, 2-3i, 4+1i)) +DT2 = data.table(z = c(2-3i, 0+1i, 0+0i)) +DT1[DT2, on = 'z'] + ################################### # Add new tests above this line # From 52bcac04aba8ad76873f48a226fb4ac0ac98c954 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 14 Jul 2019 00:28:17 +0800 Subject: [PATCH 08/25] moved new logic to C so e.g. bmerge can call it from there --- R/setkey.R | 34 ------------------------------ inst/tests/tests.Rraw | 28 +++++++++++-------------- src/forder.c | 49 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 51 deletions(-) diff --git a/R/setkey.R b/R/setkey.R index 94132a6af0..bed50cba50 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -197,17 +197,6 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las by = NULL if ( !missing(order) && (length(order) != 1L || !(order %in% c(1L, -1L))) ) stop("x is a single vector, length(order) must be =1 and it's value should be 1 (ascending) or -1 (descending).") - # we don't expect users to need complex sorting extensively - # or on massive data sets, so we take the approach of - # splitting a complex vector into its real & imaginary parts - # and using the regular forderv machinery to sort; a baremetal - # implementation would at root do the same, but the approach - # here is a bit more slapdash with respect to memory efficiency - if (is.complex(x)) { - x = list(Re(x), Im(x)) - by = 1:2 - order = rep(order, 2L) - } } else { if (!length(x)) return(integer(0L)) # to be consistent with base::order. this'll make sure forderv(NULL) will result in error # (as base does) but forderv(data.table(NULL)) and forderv(list()) will return integer(0L)) @@ -223,29 +212,6 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las if ( (length(order) != 1L && length(order) != length(by)) || !all(order %in% c(1L, -1L)) ) stop("x is a list, length(order) must be either =1 or =length(by) and each value should be 1 or -1 for each column in 'by', corresponding to ascending or descending order, respectively. If length(order) == 1, it will be recycled to length(by).") if (length(order) == 1L) order = rep(order, length(by)) - nn = length(x) - is_cplx = vapply_1b(by, function(i) { - if (i < 1L) stop("'by' must be positive, received ", i, call. = FALSE) - if (i > nn) stop("'by' must be in [1,", nn, "], received ", i, call. = FALSE) - is.complex(x[[i]]) - }) - if (any(is_cplx)) { - out_x = vector('list', length(by) + sum(is_cplx)) - order = rep(order, is_cplx + 1L) - j = 1L - for (i in seq_along(by)) { - if (is_cplx[i]) { - out_x[[j]] = Re(x[[by[i]]]) - out_x[[j + 1L]] = Im(x[[by[i]]]) - j = j + 2L - } else { - out_x[[j]] = x[[by[i]]] - j = j + 1L - } - } - x = out_x; rm(out_x) - by = seq_along(x) - } } order = as.integer(order) .Call(Cforder, x, by, retGrp, sort, order, na.last) # returns integer() if already sorted, regardless of sort=TRUE|FALSE diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e919fef605..e335d9a092 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -2783,7 +2783,7 @@ test(988, unique(dt, by='B'), dt[!duplicated(df[, 'B'])]) test(989, unique(dt, by='C'), dt[!duplicated(df[, 'C'])]) test(990, unique(dt, by=c('B', 'C')), dt[!duplicated(df[, c('B', 'C')])]) test(991, unique(dt, by=NULL), dt[!duplicated(df)]) -test(991.1, unique(dt, by=4), error="'by' must be in [1,3], received 4") +test(991.1, unique(dt, by=4), error="'by' value 4 out of range.*1,3") test(991.2, unique(dt, by=c(1,3.1)), error="'by' is type 'double' and one or more items in it are not whole integers") test(991.3, unique(dt, by=2:3), dt[!duplicated(df[,c('B','C')])]) test(991.4, unique(dt, by=c('C','D','E')), error="'by' contains 'D' which is not a column name") @@ -13158,9 +13158,9 @@ setnames(DT, '.xi') setkey(DT, NULL) test(1962.037, setkey(DT, .xi), error = "x contains a column called '.xi'") -DT = data.table(a = 1+3i) +DT = data.table(a = as.raw(0)) test(1962.038, setkey(DT, a), - error = "Column 'a' is type 'complex'") + error = "Column 'a' is type 'raw'") test(1962.039, is.sorted(3:1, by = 'x'), error = 'x is vector but') @@ -13216,8 +13216,8 @@ test(1962.064, setorderv(copy(DT)), test(1962.065, setorderv(DT, 'c'), error = 'some columns are not in the data.table') setnames(DT, 1L, '.xi') test(1962.066, setorderv(DT, 'b'), error = "x contains a column called '.xi'") -test(1962.067, setorderv(data.table(a = 1+3i), 'a'), - error = "Column 'a' is type 'complex'") +test(1962.067, setorderv(data.table(a = as.raw(0)), 'a'), + error = "Column 'a' is type 'raw'") DT = data.table( color = c("yellow", "red", "green", "red", "green", "red", @@ -13743,7 +13743,7 @@ test(1984.05, DT[ , sum(b), keyby = c, verbose = TRUE], ### hitting byval = eval(bysub, setattr(as.list(seq_along(xss)), ...) test(1984.06, DT[1:3, sum(a), by=b:c], data.table(b=10:8, c=1:3, V1=1:3)) test(1984.07, DT[, sum(a), by=call('sin',pi)], error='must evaluate to a vector or a list of vectors') -test(1984.08, DT[, sum(a), by=1+3i], error='column or expression.*type complex') +test(1984.08, DT[, sum(a), by=as.raw(0)], error='column or expression.*type raw') test(1984.09, DT[, sum(a), by=.(1,1:2)], error='The items.*list are length [(]1,2[)].*Each must be length 10; .*rows in x.*after subsetting') options('datatable.optimize' = Inf) test(1984.10, DT[ , 1, by = .(a %% 2), verbose = TRUE], @@ -14755,14 +14755,14 @@ dt1 <- data.table(int = 1L:10L, bool = c(rep(FALSE, 9), TRUE), char = letters[1L:10L], fact = factor(letters[1L:10L]), - complex = as.complex(1:5)) + raw = as.raw(1:5)) dt2 <- data.table(int = 1L:5L, doubleInt = as.numeric(1:5), realDouble = seq(0.5, 2.5, by = 0.5), bool = TRUE, char = letters[1L:5L], fact = factor(letters[1L:5L]), - complex = as.complex(1:5)) + raw = as.raw(1:5)) if (test_bit64) { dt1[, int64 := as.integer64(c(1:9, 3e10))] dt2[, int64 := as.integer64(c(1:4, 3e9))] @@ -14779,8 +14779,8 @@ test(2044.08, nrow(dt1[dt2, on="fact==fact", verbose=TRUE]), nrow(dt if (test_bit64) { test(2044.09, nrow(dt1[dt2, on = "int64==int64", verbose=TRUE]), nrow(dt2), output="No coercion needed") } -test(2044.10, dt1[dt2, on = "int==complex"], error = "i.complex is type complex which is not supported by data.table join") -test(2044.11, dt1[dt2, on = "complex==int"], error = "x.complex is type complex which is not supported by data.table join") +test(2044.10, dt1[dt2, on = "int==raw"], error = "i.raw is type raw which is not supported by data.table join") +test(2044.11, dt1[dt2, on = "raw==int"], error = "x.raw is type raw which is not supported by data.table join") # incompatible types test(2044.20, dt1[dt2, on="bool==int"], error="Incompatible join types: x.bool (logical) and i.int (integer)") test(2044.21, dt1[dt2, on="bool==doubleInt"], error="Incompatible join types: x.bool (logical) and i.doubleInt (double)") @@ -15270,16 +15270,12 @@ test(2064.07, DT[ , .N, by=z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L))) # test(2964.10, DT test(2964.11, dcast(DT, z ~ grp, value.var='v', fill=0), data.table(z=unq_z, `1`=c(3, 4, 5, 0), `2`=c(1, 1, 0, 9), key='z')) -test(2964.12, frank(DT$z), c(1.5, 1.5, 3.5, 5, 6)) -test(2964.13, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 5L, 6L)) +test(2964.12, frank(DT$z), c(1.5, 1.5, 3.5, 3.5, 5, 6)) +test(2964.13, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L)) test(2964.14, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) test(2964.15, DT[ , rowid(z, grp)], rep(1L, 6L)) test(2964.16, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) -DT1 = data.table(z = c(0+1i, 2-3i, 4+1i)) -DT2 = data.table(z = c(2-3i, 0+1i, 0+0i)) -DT1[DT2, on = 'z'] - ################################### # Add new tests above this line # ################################### diff --git a/src/forder.c b/src/forder.c index d810bf6536..9e8630b744 100644 --- a/src/forder.c +++ b/src/forder.c @@ -440,11 +440,58 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S if (!isInteger(by) || !LENGTH(by)) error("DT has %d columns but 'by' is either not integer or is length 0", length(DT)); // seq_along(x) at R level if (!isInteger(ascArg) || LENGTH(ascArg)!=LENGTH(by)) error("Either 'ascArg' is not integer or its length (%d) is different to 'by's length (%d)", LENGTH(ascArg), LENGTH(by)); nrow = length(VECTOR_ELT(DT,0)); + int n_cplx = 0; for (int i=0; i length(DT)) - error("Internal error: 'by' out of range should have been caught earlier"); // #nocov + error("'by' value %d out of range [1,%d]", INTEGER(by)[i], length(DT)); if ( nrow != length(VECTOR_ELT(DT, INTEGER(by)[i]-1)) ) error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow); + if (TYPEOF(VECTOR_ELT(DT, i)) == CPLXSXP) { + n_cplx++; + } + } + if (n_cplx) { + // we don't expect users to need complex sorting extensively + // or on massive data sets, so we take the approach of + // splitting a complex vector into its real & imaginary parts + // and using the regular forderv machinery to sort; a baremetal + // implementation would at root do the same, but the approach + // here is a bit more slapdash with respect to memory efficiency + // (seen clearly here at C from the 3+2*n_cplx PROTECT() calls) + int n_out = length(by) + n_cplx; + SEXP new_dt = PROTECT(allocVector(VECSXP, n_out)); n_protect++; + SEXP new_asc = PROTECT(allocVector(INTSXP, n_out)); n_protect++; + SEXP new_by = PROTECT(allocVector(INTSXP, n_out)); n_protect++; + int j = 0; + for (int i=0; i Date: Sun, 14 Jul 2019 01:55:22 +0800 Subject: [PATCH 09/25] some coverage tests, extension to rleid() --- R/data.table.R | 2 +- R/setkey.R | 11 +++-------- inst/tests/tests.Rraw | 16 ++++++++++++++++ src/forder.c | 9 ++++++--- src/uniqlist.c | 13 +++++++++++++ 5 files changed, 39 insertions(+), 12 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 4eab866fe3..829d07b83c 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -829,7 +829,7 @@ replace_order = function(isub, verbose, env) { if (!is.list(byval)) stop("'by' or 'keyby' must evaluate to a vector or a list of vectors (where 'list' includes data.table and data.frame which are lists, too)") if (length(byval)==1L && is.null(byval[[1L]])) bynull=TRUE #3530 when by=(function()NULL)() if (!bynull) for (jj in seq_len(length(byval))) { - if (!typeof(byval[[jj]]) %chin% c("integer","logical","character","double","complex")) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]") + if (!typeof(byval[[jj]]) %chin% ORDERING_TYPES) stop("column or expression ",jj," of 'by' or 'keyby' is type ",typeof(byval[[jj]]),". Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))]") } tt = vapply_1i(byval,length) if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be length ", xnrow, "; the same length as there are rows in x (after subsetting if i is provided).") diff --git a/R/setkey.R b/R/setkey.R index bed50cba50..810a4526f7 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -51,14 +51,9 @@ setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRU } if (identical(cols,"")) stop("cols is the empty string. Use NULL to remove the key.") if (!all(nzchar(cols))) stop("cols contains some blanks.") - if (!length(cols)) { - cols = colnames(x) # All columns in the data.table, usually a few when used in this form - } else { - # remove backticks from cols - cols = gsub("`", "", cols, fixed = TRUE) - miss = !(cols %chin% colnames(x)) - if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=",")) - } + cols = gsub("`", "", cols, fixed = TRUE) + miss = !(cols %chin% colnames(x)) + if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=",")) ## determine, whether key is already present: if (identical(key(x),cols)) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e335d9a092..e593374b7e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15275,6 +15275,22 @@ test(2964.13, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L)) test(2964.14, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) test(2964.15, DT[ , rowid(z, grp)], rep(1L, 6L)) test(2964.16, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) +test(2964.17, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L))) + +## assorted coverage tests from along the way +if (test_bit64) { + test(2964.50, is.sorted(as.integer64(10:1)), FALSE) + test(2964.51, is.sorted(as.integer64(1:10))) +} +# sort by vector outside of table +ord = 3:1 +test(2964.52, forder(data.table(a = 3:1), ord), 3:1) + +# DT1 = data.table(z = c(0+1i, 2-3i, 4+1i)) +# DT2 = data.table(z = c(2-3i, 0+1i, 0+0i)) +# DT1[DT2, on = 'z'] + + ################################### # Add new tests above this line # diff --git a/src/forder.c b/src/forder.c index 9e8630b744..edc8eab1bf 100644 --- a/src/forder.c +++ b/src/forder.c @@ -446,9 +446,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S error("'by' value %d out of range [1,%d]", INTEGER(by)[i], length(DT)); if ( nrow != length(VECTOR_ELT(DT, INTEGER(by)[i]-1)) ) error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow); - if (TYPEOF(VECTOR_ELT(DT, i)) == CPLXSXP) { - n_cplx++; - } + if (TYPEOF(VECTOR_ELT(DT, i)) == CPLXSXP) n_cplx++; } if (n_cplx) { // we don't expect users to need complex sorting extensively @@ -461,11 +459,16 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S int n_out = length(by) + n_cplx; SEXP new_dt = PROTECT(allocVector(VECSXP, n_out)); n_protect++; SEXP new_asc = PROTECT(allocVector(INTSXP, n_out)); n_protect++; + // will be simply 1:n_out SEXP new_by = PROTECT(allocVector(INTSXP, n_out)); n_protect++; int j = 0; for (int i=0; i // DONE: return 'uniqlist' as a vector (same as duplist) and write a separate function to get group sizes // Also improvements for numeric type with a hack of checking unsigned int (to overcome NA/NaN/Inf/-Inf comparisons) (> 2x speed-up) @@ -200,6 +201,11 @@ SEXP rleid(SEXP l, SEXP cols) { // 8 bytes of bits are identical. For real (no rounding currently) and integer64 // long long == 8 bytes checked in init.c break; + case CPLXSXP: { + // tried to make long long complex * but got a warning that it's a GNU extension + double complex *pz = (double complex *)COMPLEX(jcol); + same = (long long)creal(pz[i]) == (long long)creal(pz[i-1]) && (long long)cimag(pz[i]) == (long long)cimag(pz[i-1]); + } break; default : error("Type '%s' not supported", type2char(TYPEOF(jcol))); // # nocov } @@ -232,6 +238,13 @@ SEXP rleid(SEXP l, SEXP cols) { } } break; + case CPLXSXP: { + double complex *pzjcol = (double complex *)COMPLEX(jcol); + for (R_xlen_t i=1; i Date: Sun, 14 Jul 2019 03:34:53 +0800 Subject: [PATCH 10/25] progress making ctwiddle (dtwiddle for cplx) --- inst/tests/tests.Rraw | 2 +- src/data.table.h | 6 ++++++ src/forder.c | 32 ++++++++++++++++++++++++++++++++ src/uniqlist.c | 15 ++++++++++++++- 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e593374b7e..b4f765cc94 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6474,7 +6474,7 @@ test(1464.03, rleidv(DT, "b"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L)) test(1464.04, rleid(DT$b), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L)) test(1464.05, rleidv(DT, "c"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L)) test(1464.06, rleid(DT$c), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L)) -test(1464.07, rleid(as.complex(c(1,0+5i,0+5i,1))), error="Type 'complex' not supported") +test(1464.07, rleid(as.raw(c(3L, 1L, 2L)), error="Type 'raw' not supported") test(1464.08, rleidv(DT, 0), error="outside range") test(1464.09, rleidv(DT, 5), error="outside range") test(1464.10, rleidv(DT, 1:4), 1:nrow(DT)) diff --git a/src/data.table.h b/src/data.table.h index 55c458d077..a84809c292 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -113,6 +113,12 @@ SEXP setcolorder(SEXP x, SEXP o); // forder.c int StrCmp(SEXP x, SEXP y); uint64_t dtwiddle(void *p, int i); +// for sorting of complex values +typedef struct cplxTwiddled { + uint64_t re; + uint64_t im; +} cplxTwiddled; +cplxTwiddled ctwiddle(Rcomplex *p, int i); SEXP forder(SEXP DT, SEXP by, SEXP retGrp, SEXP sortStrArg, SEXP orderArg, SEXP naArg); bool need2utf8(SEXP x, int n); SEXP isReallyReal(SEXP); diff --git a/src/forder.c b/src/forder.c index edc8eab1bf..d47647d11e 100644 --- a/src/forder.c +++ b/src/forder.c @@ -409,6 +409,38 @@ uint64_t dtwiddle(void *p, int i) Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf"); // # nocov } +cplxTwiddled ctwiddle(Rcomplex *p, int i) { + union { + double d; + uint64_t u64; + } u_re, u_im; + cplxTwiddled out; + Rcomplex z = p[i]; + u_re.d = z.r; + u_im.d = z.i; + if (R_FINITE(u_re.d)) { + if (u_re.d==0) u_re.d=0; + u_re.u64 ^= (u_re.u64 & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000; + u_re.u64 += (u_re.u64 & dmask) << 1; + out.re = u_re.u64 >> (dround*8); + } else if (ISNAN(u_re.d)) { + out.re = ISNA(u_re.d) ? 0 : 1; + } else if (isinf(u_re.d)) { + out.re = signbit(u_re.d) ? 2: (0xffffffffffffffff>>(dround*8)); + } else Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf"); // # nocov + if (R_FINITE(u_im.d)) { + if (u_im.d==0) u_im.d=0; + u_im.u64 ^= (u_im.u64 & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000; + u_im.u64 += (u_im.u64 & dmask) << 1; + out.re = u_im.u64 >> (dround*8); + } else if (ISNAN(u_im.d)) { + out.re = ISNA(u_im.d) ? 0 : 1; + } else if (isinf(u_im.d)) { + out.re = signbit(u_im.d) ? 2: (0xffffffffffffffff>>(dround*8)); + } else Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf"); // # nocov + return out; +} + void radix_r(const int from, const int to, const int radix); SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, SEXP naArg) diff --git a/src/uniqlist.c b/src/uniqlist.c index ecf1f3e80a..370c11a75d 100644 --- a/src/uniqlist.c +++ b/src/uniqlist.c @@ -1,5 +1,4 @@ #include "data.table.h" -#include // DONE: return 'uniqlist' as a vector (same as duplist) and write a separate function to get group sizes // Also improvements for numeric type with a hack of checking unsigned int (to overcome NA/NaN/Inf/-Inf comparisons) (> 2x speed-up) @@ -124,6 +123,15 @@ SEXP uniqlist(SEXP l, SEXP order) // to be stored, ii) many short-circuit early before the if (!b) anyway (negating benefit) and iii) we may not have needed LHS this time so logic would be complex. } break; + case CPLXSXP: { + Rcomplex *pz = COMPLEX(v); + b = (unsigned long long)pz[thisi].r == (unsigned long long)pz[previ].r && + (unsigned long long)pz[thisi].i == (unsigned long long)pz[previ].i; + if (!b) { + cplxTwiddled thisz=ctwiddle(pz, thisi), prevz=ctwiddle(pz, previ); + b = thisz.re == prevz.re && thisz.im == prevz.im; + } + } break; default : error("Type '%s' not supported", type2char(TYPEOF(v))); // # nocov } @@ -318,6 +326,11 @@ SEXP nestedid(SEXP l, SEXP cols, SEXP order, SEXP grps, SEXP resetvals, SEXP mul b = i64[j] ? ((int64_t *)xd)[thisi] >= ((int64_t *)xd)[previ] : dtwiddle(xd, thisi) >= dtwiddle(xd, previ); } break; + case CPLXSXP: { + Rcomplex *pz = COMPLEX(v); + cplxTwiddled thisz=ctwiddle(pz, thisi), prevz=ctwiddle(pz, previ); + b = thisz.re >= prevz.re || (thisz.re == thisz.re && thisz.im >= thisz.im); + } break; default: error("Type '%s' not supported", type2char(TYPEOF(v))); // # nocov } From e5d1d1de137a6c5c850e415faa6fa9d719777824 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 14 Jul 2019 03:39:19 +0800 Subject: [PATCH 11/25] start preferring Rcomplex type --- inst/tests/tests.Rraw | 2 +- src/uniqlist.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b4f765cc94..6cddff5046 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6474,7 +6474,7 @@ test(1464.03, rleidv(DT, "b"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L)) test(1464.04, rleid(DT$b), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L)) test(1464.05, rleidv(DT, "c"), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L)) test(1464.06, rleid(DT$c), c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 5L, 5L)) -test(1464.07, rleid(as.raw(c(3L, 1L, 2L)), error="Type 'raw' not supported") +test(1464.07, rleid(as.raw(c(3L, 1L, 2L))), error="Type 'raw' not supported") test(1464.08, rleidv(DT, 0), error="outside range") test(1464.09, rleidv(DT, 5), error="outside range") test(1464.10, rleidv(DT, 1:4), 1:nrow(DT)) diff --git a/src/uniqlist.c b/src/uniqlist.c index 370c11a75d..8951eb716d 100644 --- a/src/uniqlist.c +++ b/src/uniqlist.c @@ -211,8 +211,8 @@ SEXP rleid(SEXP l, SEXP cols) { break; case CPLXSXP: { // tried to make long long complex * but got a warning that it's a GNU extension - double complex *pz = (double complex *)COMPLEX(jcol); - same = (long long)creal(pz[i]) == (long long)creal(pz[i-1]) && (long long)cimag(pz[i]) == (long long)cimag(pz[i-1]); + Rcomplex *pz = COMPLEX(jcol); + same = (long long)pz[i].r == (long long)pz[i-1].r && (long long)pz[i].i == (long long)pz[i-1].i; } break; default : error("Type '%s' not supported", type2char(TYPEOF(jcol))); // # nocov @@ -247,9 +247,10 @@ SEXP rleid(SEXP l, SEXP cols) { } break; case CPLXSXP: { - double complex *pzjcol = (double complex *)COMPLEX(jcol); + Rcomplex *pzjcol = COMPLEX(jcol); for (R_xlen_t i=1; i Date: Sun, 14 Jul 2019 03:43:24 +0800 Subject: [PATCH 12/25] switch to Rcomplex API --- NEWS.md | 2 +- src/reorder.c | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 81a50403ae..c28317ee24 100644 --- a/NEWS.md +++ b/NEWS.md @@ -129,7 +129,7 @@ # TRUE ``` -19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not well-ordered (and thus not supported as keys). Thanks @atalikami for the report. +19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not yet supported as keys). Thanks @atalikami for the report. #### BUG FIXES diff --git a/src/reorder.c b/src/reorder.c index d55d147283..b61286ff23 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -1,5 +1,4 @@ #include "data.table.h" -#include SEXP reorder(SEXP x, SEXP order) { @@ -80,9 +79,9 @@ SEXP reorder(SEXP x, SEXP order) memcpy(vd+start, tmp+start, (end-start+1)*size); } else { // #1444 -- support for copying CPLXSXP (which have size 16) - double complex *vd = (double complex *)DATAPTR(v); - const double complex *restrict cvd = vd; - double complex *restrict tmp = (double complex *)TMP; + Rcomplex *vd = (Rcomplex *)DATAPTR(v); + const Rcomplex *restrict cvd = vd; + Rcomplex *restrict tmp = (Rcomplex *)TMP; #pragma omp parallel for num_threads(getDTthreads()) for (int i=start; i<=end; i++) { tmp[i] = cvd[idx[i]-1]; // copies 16 bytes; including pointers on 64bit (STRSXP and VECSXP) From 761fe90d5a78a106a2d7ff4dcc4f52b7b311f517 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 14 Jul 2019 20:16:10 +0800 Subject: [PATCH 13/25] setkey now works on complex columns --- inst/tests/tests.Rraw | 25 +++++++++++++------------ src/forder.c | 9 +++++---- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c2777d3f9b..05831f8a26 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15242,7 +15242,6 @@ ll = list(1:2, NULL, 3:4) test(2063.4, transpose(ll, ignore=TRUE), list(c(1L, 3L), c(2L, 4L))) options(old) -<<<<<<< HEAD # forderv (and downstream functions) handles complex vector input, part of #3690 DT = data.table( a = c(1L, 1L, 8L, 2L, 1L, 9L, 3L, 2L, 6L, 6L), @@ -15265,24 +15264,26 @@ DT = data.table( unq_z = 0:3 + (1:4)*1i test(2064.07, DT[ , .N, by=z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L))) test(2064.08, DT[ , .N, keyby = z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L), key='z')) -test(2964.09, dcast(DT, z ~ grp, value.var='v', fill=0), +test(2064.09, dcast(DT, z ~ grp, value.var='v', fill=0), data.table(z=unq_z, `1`=c(3, 4, 5, 0), `2`=c(1, 1, 0, 9), key='z')) -test(2964.10, frank(DT$z), c(1.5, 1.5, 3.5, 3.5, 5, 6)) -test(2964.12, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L)) -test(2964.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) -test(2964.13, DT[ , rowid(z, grp)], rep(1L, 6L)) -test(2964.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) -test(2964.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L))) +test(2064.10, frank(DT$z), c(1.5, 1.5, 3.5, 3.5, 5, 6)) +test(2064.12, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L)) +test(2064.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) +test(2064.13, DT[ , rowid(z, grp)], rep(1L, 6L)) +test(2064.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) +test(2064.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L))) # #1444 -- support for ordering tables with complex columns DT = data.table(a = 2:1, z = 0 + (1:0)*1i) -test(2064.1, setkey(copy(DT), a), data.table(a=1:2, z=complex(0, 1:0), key='a')) -test(2064.2, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(0, 1))) +test(2064.16, setkey(copy(DT), a), data.table(a=1:2, z=0+ (0:1)*1i, key='a')) +test(2064.17, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(1, 0))) +test(2064.18, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) + # raw continues not to be supported DT = data.table(ID=2:1, r=as.raw(0:1)) -test(2064.3, setkey(DT, ID), error="Item 2 of list is type 'raw'") +test(2064.18, setkey(DT, ID), error="Item 2 of list is type 'raw'") # setreordervec triggers !isNewList branch for coverage -test(2064.4, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP") +test(2064.19, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP") # DT1 = data.table(z = c(0+1i, 2-3i, 4+1i)) # DT2 = data.table(z = c(2-3i, 0+1i, 0+0i)) diff --git a/src/forder.c b/src/forder.c index d47647d11e..5f31bc1b2f 100644 --- a/src/forder.c +++ b/src/forder.c @@ -474,11 +474,12 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S nrow = length(VECTOR_ELT(DT,0)); int n_cplx = 0; for (int i=0; i length(DT)) - error("'by' value %d out of range [1,%d]", INTEGER(by)[i], length(DT)); - if ( nrow != length(VECTOR_ELT(DT, INTEGER(by)[i]-1)) ) + int by_i = INTEGER(by)[i]; + if (by_i < 1 || by_i > length(DT)) + error("'by' value %d out of range [1,%d]", by_i, length(DT)); + if ( nrow != length(VECTOR_ELT(DT, by_i-1)) ) error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow); - if (TYPEOF(VECTOR_ELT(DT, i)) == CPLXSXP) n_cplx++; + if (TYPEOF(VECTOR_ELT(DT, by_i-1)) == CPLXSXP) n_cplx++; } if (n_cplx) { // we don't expect users to need complex sorting extensively From 7261011f7bd18d48a2c88f0ba8ebe2cfe8f9d620 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 15 Jul 2019 01:42:39 +0800 Subject: [PATCH 14/25] ostensibly done uniqlist; progress on bmerge --- inst/tests/tests.Rraw | 7 +++-- src/bmerge.c | 70 +++++++++++++++++++++++++++++++++++++++++-- src/uniqlist.c | 44 +++++++++++++++++++-------- 3 files changed, 104 insertions(+), 17 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 05831f8a26..147316ba46 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15267,7 +15267,7 @@ test(2064.08, DT[ , .N, keyby = z], data.table(z=unq_z, N=c(2L, 2L, 1L, 1L), key test(2064.09, dcast(DT, z ~ grp, value.var='v', fill=0), data.table(z=unq_z, `1`=c(3, 4, 5, 0), `2`=c(1, 1, 0, 9), key='z')) test(2064.10, frank(DT$z), c(1.5, 1.5, 3.5, 3.5, 5, 6)) -test(2064.12, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L)) +test(2064.11, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L)) test(2064.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) test(2064.13, DT[ , rowid(z, grp)], rep(1L, 6L)) test(2064.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) @@ -15278,12 +15278,13 @@ DT = data.table(a = 2:1, z = 0 + (1:0)*1i) test(2064.16, setkey(copy(DT), a), data.table(a=1:2, z=0+ (0:1)*1i, key='a')) test(2064.17, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(1, 0))) test(2064.18, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) +test(2064.19, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i)) # raw continues not to be supported DT = data.table(ID=2:1, r=as.raw(0:1)) -test(2064.18, setkey(DT, ID), error="Item 2 of list is type 'raw'") +test(2064.20, setkey(DT, ID), error="Item 2 of list is type 'raw'") # setreordervec triggers !isNewList branch for coverage -test(2064.19, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP") +test(2064.21, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP") # DT1 = data.table(z = c(0+1i, 2-3i, 4+1i)) # DT2 = data.table(z = c(2-3i, 0+1i, 0+0i)) diff --git a/src/bmerge.c b/src/bmerge.c index eb55569b00..090cc58f1a 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -204,6 +204,8 @@ static union { unsigned long long ull; long long ll; SEXP s; + Rcomplex z; + cplxTwiddled zull; } ival, xval; static uint64_t i64twiddle(void *p, int i) @@ -299,7 +301,7 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg // ilow and iupp now surround the group in ic, too } break; - case STRSXP : + case STRSXP : { if (op[col] != EQ) error("Only '==' operator is supported for columns of type %s.", type2char(TYPEOF(xc))); ival.s = ENC2UTF8(STRING_ELT(ic,ir)); while(xlow < xupp-1) { @@ -338,7 +340,7 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg xval.s = ENC2UTF8(STRING_ELT(ic, o ? o[mid]-1 : mid)); if (xval.s == ival.s) tmpupp=mid; else ilow=mid; // see above re == } - break; + } break; case REALSXP : { double *dic = REAL(ic); double *dxc = REAL(xc); @@ -405,6 +407,70 @@ void bmerge_r(int xlowIn, int xuppIn, int ilowIn, int iuppIn, int col, int thisg // ilow and iupp now surround the group in ic, too } break; + case CPLXSXP : { + Rcomplex *dic = COMPLEX(ic); + Rcomplex *dxc = COMPLEX(xc); + ival.zull = ctwiddle(dic, ir); + while(xlow < xupp-1) { + int mid = xlow + (xupp-xlow)/2; + xval.zull = ctwiddle(dxc, XIND(mid)); + if (xval.zull.reival.zull.re || + (xval.zull.re = ival.zull.re && xval.zull.im > ival.zull.im)) { + xupp=mid; + } else { // xval.zull == ival.zull + tmplow = mid; + tmpupp = mid; + while(tmplow-1 && op[col] != EQ) { + Rboolean isivalNA = (Rboolean)(ISNAN(dic[ir].r) || ISNAN(dic[ir].i)); + switch (op[col]) { + case LE : if (!isivalNA) xlow = xlowIn; break; + case LT : xupp = xlow + 1; if (!isivalNA) xlow = xlowIn; break; + case GE : if (!isivalNA) xupp = xuppIn; break; + case GT : xlow = xupp - 1; if (!isivalNA) xupp = xuppIn; break; + } + // for LE/LT cases, we need to ensure xlow excludes NA indices, != EQ is checked above already + if (op[col] <= 3 && xlow-1) { + while(tmplow Date: Thu, 18 Jul 2019 09:51:57 +0800 Subject: [PATCH 15/25] scale back attempts at bmerge, all of uniqlist --- NEWS.md | 5 ++-- inst/tests/tests.Rraw | 1 - src/bmerge.c | 68 +------------------------------------------ src/data.table.h | 6 ---- src/forder.c | 32 -------------------- src/uniqlist.c | 59 ++++++++----------------------------- 6 files changed, 16 insertions(+), 155 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1321c92ef7..704711026d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -131,12 +131,13 @@ # TRUE ``` -19. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z1ival.zull.re || - (xval.zull.re = ival.zull.re && xval.zull.im > ival.zull.im)) { - xupp=mid; - } else { // xval.zull == ival.zull - tmplow = mid; - tmpupp = mid; - while(tmplow-1 && op[col] != EQ) { - Rboolean isivalNA = (Rboolean)(ISNAN(dic[ir].r) || ISNAN(dic[ir].i)); - switch (op[col]) { - case LE : if (!isivalNA) xlow = xlowIn; break; - case LT : xupp = xlow + 1; if (!isivalNA) xlow = xlowIn; break; - case GE : if (!isivalNA) xupp = xuppIn; break; - case GT : xlow = xupp - 1; if (!isivalNA) xupp = xuppIn; break; - } - // for LE/LT cases, we need to ensure xlow excludes NA indices, != EQ is checked above already - if (op[col] <= 3 && xlow-1) { - while(tmplow> (dround*8); - } else if (ISNAN(u_re.d)) { - out.re = ISNA(u_re.d) ? 0 : 1; - } else if (isinf(u_re.d)) { - out.re = signbit(u_re.d) ? 2: (0xffffffffffffffff>>(dround*8)); - } else Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf"); // # nocov - if (R_FINITE(u_im.d)) { - if (u_im.d==0) u_im.d=0; - u_im.u64 ^= (u_im.u64 & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000; - u_im.u64 += (u_im.u64 & dmask) << 1; - out.re = u_im.u64 >> (dround*8); - } else if (ISNAN(u_im.d)) { - out.re = ISNA(u_im.d) ? 0 : 1; - } else if (isinf(u_im.d)) { - out.re = signbit(u_im.d) ? 2: (0xffffffffffffffff>>(dround*8)); - } else Error("Unknown non-finite value; not NA, NaN, -Inf or +Inf"); // # nocov - return out; -} - void radix_r(const int from, const int to, const int radix); SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, SEXP naArg) diff --git a/src/uniqlist.c b/src/uniqlist.c index 0a29ffeab5..10f9455c93 100644 --- a/src/uniqlist.c +++ b/src/uniqlist.c @@ -28,19 +28,17 @@ SEXP uniqlist(SEXP l, SEXP order) if (ncol==1) { -// unfortunately no equality method overloaded for Rcomplex, compare by components -#define TEST_ELEM_NEQ if(elem != prev -#define TEST_ELEM_NEQ_C if((elem.r != prev.r || elem.i != prev.i) - #define COMPARE1 \ prev = *vd; \ for (int i=1; i= ((int64_t *)xd)[previ] : dtwiddle(xd, thisi) >= dtwiddle(xd, previ); } break; - case CPLXSXP: { - Rcomplex *pz = COMPLEX(v); - cplxTwiddled thisz=ctwiddle(pz, thisi), prevz=ctwiddle(pz, previ); - b = thisz.re >= prevz.re || (thisz.re == thisz.re && thisz.im >= thisz.im); - } break; default: error("Type '%s' not supported", type2char(TYPEOF(v))); // # nocov } @@ -406,4 +372,3 @@ SEXP uniqueNlogical(SEXP x, SEXP narmArg) { return ScalarInteger(3-narm); return ScalarInteger(2-(narm && third!=NA_LOGICAL)); } - From b922ee9d674d79a64b089b4973c1809e5a59c3fc Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 18 Jul 2019 09:58:53 +0800 Subject: [PATCH 16/25] tidy up tests --- inst/tests/tests.Rraw | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b0a1d91f13..ce5ee7e356 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15360,32 +15360,21 @@ test(2069.11, frank(DT$z, ties.method='max'), c(2L, 2L, 4L, 4L, 5L, 6L)) test(2069.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) test(2069.13, DT[ , rowid(z, grp)], rep(1L, 6L)) test(2069.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) +test(2069.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L))) -# #1444 -- support for ordering tables with complex columns +# setkey, setorder work DT = data.table(a = 2:1, z = 0 + (1:0)*1i) -test(2069.16, setkey(copy(DT), a), data.table(a=1:2, z=0+ (0:1)*1i, key='a')) -test(2069.17, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(1, 0))) -test(2069.18, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) -test(2069.19, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i)) - -# raw continues not to be supported -DT = data.table(ID=2:1, r=as.raw(0:1)) -test(2069.20, setkey(DT, ID), error="Item 2 of list is type 'raw'") -# setreordervec triggers !isNewList branch for coverage -test(2069.21, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP") - -# DT1 = data.table(z = c(0+1i, 2-3i, 4+1i)) -# DT2 = data.table(z = c(2-3i, 0+1i, 0+0i)) -# DT1[DT2, on = 'z'] +test(2069.16, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) +test(2069.17, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i)) ## assorted coverage tests from along the way if (test_bit64) { - test(2069.50, is.sorted(as.integer64(10:1)), FALSE) - test(2069.51, is.sorted(as.integer64(1:10))) + test(2069.18, is.sorted(as.integer64(10:1)), FALSE) + test(2069.19, is.sorted(as.integer64(1:10))) } # sort by vector outside of table ord = 3:1 -test(2069.52, forder(data.table(a = 3:1), ord), 3:1) +test(2069.20, forder(data.table(a = 3:1), ord), 3:1) ################################### From b6565415cf623499ac88f920f0fc8997ee02bf2b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 18 Jul 2019 10:07:04 +0800 Subject: [PATCH 17/25] unique also works --- NEWS.md | 2 +- inst/tests/tests.Rraw | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 704711026d..b08e965ec5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -130,7 +130,7 @@ identical(y1,y2) && identical(y1,y3) # TRUE ``` - + 19. `shift` now supports type `complex`, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690). 20. `setkey` now supports type `complex` as value columns (not as key columns), [#1444](https://github.com/Rdatatable/data.table/issues/1444). Thanks Gareth Ward for the report. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ce5ee7e356..1751be76a3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15361,20 +15361,23 @@ test(2069.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) test(2069.13, DT[ , rowid(z, grp)], rep(1L, 6L)) test(2069.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) test(2069.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L))) +test(2069.16, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9))) +test(2069.17, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9))) +test(2069.18, uniqueN(DT$z), 4L) # setkey, setorder work DT = data.table(a = 2:1, z = 0 + (1:0)*1i) -test(2069.16, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) -test(2069.17, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i)) +test(2069.19, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) +test(2069.20, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i)) ## assorted coverage tests from along the way if (test_bit64) { - test(2069.18, is.sorted(as.integer64(10:1)), FALSE) - test(2069.19, is.sorted(as.integer64(1:10))) + test(2069.21, is.sorted(as.integer64(10:1)), FALSE) + test(2069.22, is.sorted(as.integer64(1:10))) } # sort by vector outside of table ord = 3:1 -test(2069.20, forder(data.table(a = 3:1), ord), 3:1) +test(2069.23, forder(data.table(a = 3:1), ord), 3:1) ################################### From 3c3228b4fb7eeb20d2cb7308a63871baa2fd8910 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 18 Jul 2019 11:53:49 +0800 Subject: [PATCH 18/25] updated NEWS item & added coverage tests --- NEWS.md | 2 +- inst/tests/tests.Rraw | 12 +++++++++++- src/dogroups.c | 18 ++++++++++-------- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/NEWS.md b/NEWS.md index b08e965ec5..85ec154838 100644 --- a/NEWS.md +++ b/NEWS.md @@ -135,7 +135,7 @@ 20. `setkey` now supports type `complex` as value columns (not as key columns), [#1444](https://github.com/Rdatatable/data.table/issues/1444). Thanks Gareth Ward for the report. -21. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z10 From 009935cfaad6b7bb9cc72a4e7b4d4a9894fa6e6b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 18 Jul 2019 12:28:59 +0800 Subject: [PATCH 19/25] one more nocov --- src/dogroups.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dogroups.c b/src/dogroups.c index 191c6ef54e..3ea0b0ba9b 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -84,7 +84,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX SEXP *xknameSyms = (SEXP *)R_alloc(length(xknames), sizeof(SEXP)); for(int i=0; i Date: Thu, 18 Jul 2019 13:53:56 +0800 Subject: [PATCH 20/25] actually hit LGLSXP branch! --- inst/tests/tests.Rraw | 4 ++++ src/dogroups.c | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 18f8dd2cad..0227e05cca 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15388,6 +15388,10 @@ test(2069.28, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)]) DT = data.table(x=1:4,y=1:2,lgl=TRUE,key="x,y") test(2069.29, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1, c(TRUE, NA, NA, NA, NA, TRUE, NA, NA, TRUE, NA, NA, NA, NA, TRUE, NA, NA)) +set.seed(45L) +DT1 = data.table(a = sample(3L, 15L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 15L, TRUE)) +DT2 = data.table(a = sample(3L, 6L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 6L, TRUE)) +test(2069.30, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L)) ################################### diff --git a/src/dogroups.c b/src/dogroups.c index 3ea0b0ba9b..ea036d0bff 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -161,9 +161,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX INTEGER(I)[0] = 0; for (int j=0; j Date: Thu, 18 Jul 2019 14:15:57 +0800 Subject: [PATCH 21/25] more coverage --- inst/tests/tests.Rraw | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0227e05cca..88a04b0eef 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15361,38 +15361,40 @@ test(2069.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) test(2069.13, DT[ , rowid(z, grp)], rep(1L, 6L)) test(2069.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) test(2069.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L))) -test(2069.16, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9))) -test(2069.17, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9))) -test(2069.18, uniqueN(DT$z), 4L) +test(2069.16, rleidv(DT, "z"), c(1L, 1L, 2L, 2L, 3L, 4L)) +test(2069.17, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9))) +test(2069.18, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9))) +test(2069.19, uniqueN(DT$z), 4L) # setkey, setorder work DT = data.table(a = 2:1, z = 0 + (1:0)*1i) -test(2069.19, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) -test(2069.20, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i)) +test(2069.20, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) +test(2069.21, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i)) ## assorted coverage tests from along the way if (test_bit64) { - test(2069.21, is.sorted(as.integer64(10:1)), FALSE) - test(2069.22, is.sorted(as.integer64(1:10))) + test(2069.22, is.sorted(as.integer64(10:1)), FALSE) + test(2069.23, is.sorted(as.integer64(1:10))) } # sort by vector outside of table ord = 3:1 -test(2069.23, forder(data.table(a=3:1), ord), 3:1) +test(2069.24, forder(data.table(a=3:1), ord), 3:1) # dogroups.c coverage -test(2069.24, data.table(c='1')[ , expression(1), by=c], error="j evaluates to type 'expression'") -test(2069.25, data.table(c='1', d=2)[ , d := .(NULL), by=c], error='RHS is NULL when grouping :=') -test(2069.26, data.table(c='1', d=2)[ , c(a='b'), by=c, verbose=TRUE], output='j appears to be a named vector') -test(2069.27, data.table(c = '1', d = 2)[ , .(a = c(nm='b')), by = c, verbose = TRUE], output = 'Column 1 of j is a named vector') +test(2069.25, data.table(c='1')[ , expression(1), by=c], error="j evaluates to type 'expression'") +test(2069.26, data.table(c='1', d=2)[ , d := .(NULL), by=c], error='RHS is NULL when grouping :=') +test(2069.27, data.table(c='1', d=2)[ , c(a='b'), by=c, verbose=TRUE], output='j appears to be a named vector') +test(2069.28, data.table(c = '1', d = 2)[ , .(a = c(nm='b')), by = c, verbose = TRUE], output = 'Column 1 of j is a named vector') DT <- data.table(a = rep(1:3, each = 4), b = LETTERS[1:4], z = 0:3 + (4:1)*1i) -test(2069.28, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)]) +test(2069.29, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)]) DT = data.table(x=1:4,y=1:2,lgl=TRUE,key="x,y") -test(2069.29, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1, +test(2069.30, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1, c(TRUE, NA, NA, NA, NA, TRUE, NA, NA, TRUE, NA, NA, NA, NA, TRUE, NA, NA)) set.seed(45L) DT1 = data.table(a = sample(3L, 15L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 15L, TRUE)) DT2 = data.table(a = sample(3L, 6L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 6L, TRUE)) -test(2069.30, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L)) - +test(2069.31, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L)) +DT = data.table(z = 1i) +test(2069.32, DT[DT, on = 'z'], error = "Type 'complex' not supported for merging") ################################### # Add new tests above this line # From 598097bb556db1f6bdcff2544f05c11b662a2625 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 19 Jul 2019 08:48:38 +0800 Subject: [PATCH 22/25] use direct double comparison instead of type punning --- inst/tests/tests.Rraw | 36 +++++++++++++++++++----------------- src/uniqlist.c | 5 ++--- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 88a04b0eef..5edb64efbb 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15361,40 +15361,42 @@ test(2069.12, frank(-DT$z, ties.method='min'), c(5L, 5L, 3L, 3L, 2L, 1L)) test(2069.13, DT[ , rowid(z, grp)], rep(1L, 6L)) test(2069.14, DT[ , rowid(z)], c(1:2, 1:2, 1L, 1L)) test(2069.15, rleid(c(1i, 1i, 1i, 0, 0, 1-1i, 2+3i, 2+3i)), rep(1:4, c(3:1, 2L))) -test(2069.16, rleidv(DT, "z"), c(1L, 1L, 2L, 2L, 3L, 4L)) -test(2069.17, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9))) -test(2069.18, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9))) -test(2069.19, uniqueN(DT$z), 4L) +# handling doubles properly +test(2069.16, rleid(c(1i, 1.1i)), 1:2) +test(2069.17, rleidv(DT, "z"), c(1L, 1L, 2L, 2L, 3L, 4L)) +test(2069.18, unique(DT, by = 'z'), data.table(z = unq_z, grp = c(1L, 1L, 1L, 2L), v = c(3, 4, 5, 9))) +test(2069.19, unique(DT, by = 'z', fromLast = TRUE), data.table(z = unq_z, grp = c(2L, 2L, 1L, 2L), v = c(1, 1, 5, 9))) +test(2069.20, uniqueN(DT$z), 4L) # setkey, setorder work DT = data.table(a = 2:1, z = 0 + (1:0)*1i) -test(2069.20, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) -test(2069.21, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i)) +test(2069.21, setkey(copy(DT), z), data.table(a=1:2, z=0+ (0:1)*1i, key='z')) +test(2069.22, setorder(DT, z), data.table(a=1:2, z=0+ (0:1)*1i)) ## assorted coverage tests from along the way if (test_bit64) { - test(2069.22, is.sorted(as.integer64(10:1)), FALSE) - test(2069.23, is.sorted(as.integer64(1:10))) + test(2069.23, is.sorted(as.integer64(10:1)), FALSE) + test(2069.24, is.sorted(as.integer64(1:10))) } # sort by vector outside of table ord = 3:1 -test(2069.24, forder(data.table(a=3:1), ord), 3:1) +test(2069.25, forder(data.table(a=3:1), ord), 3:1) # dogroups.c coverage -test(2069.25, data.table(c='1')[ , expression(1), by=c], error="j evaluates to type 'expression'") -test(2069.26, data.table(c='1', d=2)[ , d := .(NULL), by=c], error='RHS is NULL when grouping :=') -test(2069.27, data.table(c='1', d=2)[ , c(a='b'), by=c, verbose=TRUE], output='j appears to be a named vector') -test(2069.28, data.table(c = '1', d = 2)[ , .(a = c(nm='b')), by = c, verbose = TRUE], output = 'Column 1 of j is a named vector') +test(2069.26, data.table(c='1')[ , expression(1), by=c], error="j evaluates to type 'expression'") +test(2069.27, data.table(c='1', d=2)[ , d := .(NULL), by=c], error='RHS is NULL when grouping :=') +test(2069.28, data.table(c='1', d=2)[ , c(a='b'), by=c, verbose=TRUE], output='j appears to be a named vector') +test(2069.29, data.table(c = '1', d = 2)[ , .(a = c(nm='b')), by = c, verbose = TRUE], output = 'Column 1 of j is a named vector') DT <- data.table(a = rep(1:3, each = 4), b = LETTERS[1:4], z = 0:3 + (4:1)*1i) -test(2069.29, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)]) +test(2069.30, DT[, .SD[3,], by=b], DT[9:12, .(b, a, z)]) DT = data.table(x=1:4,y=1:2,lgl=TRUE,key="x,y") -test(2069.30, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1, +test(2069.31, DT[CJ(1:4,1:4), any(lgl), by=.EACHI]$V1, c(TRUE, NA, NA, NA, NA, TRUE, NA, NA, TRUE, NA, NA, NA, NA, TRUE, NA, NA)) set.seed(45L) DT1 = data.table(a = sample(3L, 15L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 15L, TRUE)) DT2 = data.table(a = sample(3L, 6L, TRUE) + .1, b=sample(c(TRUE, FALSE, NA), 6L, TRUE)) -test(2069.31, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L)) +test(2069.32, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b")]$y, rep(0L, 6L)) DT = data.table(z = 1i) -test(2069.32, DT[DT, on = 'z'], error = "Type 'complex' not supported for merging") +test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' not supported for merging") ################################### # Add new tests above this line # diff --git a/src/uniqlist.c b/src/uniqlist.c index 10f9455c93..318722930b 100644 --- a/src/uniqlist.c +++ b/src/uniqlist.c @@ -203,7 +203,7 @@ SEXP rleid(SEXP l, SEXP cols) { case CPLXSXP: { // tried to make long long complex * but got a warning that it's a GNU extension Rcomplex *pz = COMPLEX(jcol); - same = (long long)pz[i].r == (long long)pz[i-1].r && (long long)pz[i].i == (long long)pz[i-1].i; + same = pz[i].r == pz[i-1].r && pz[i].i == pz[i-1].i; } break; default : error("Type '%s' not supported", type2char(TYPEOF(jcol))); // # nocov @@ -240,8 +240,7 @@ SEXP rleid(SEXP l, SEXP cols) { case CPLXSXP: { Rcomplex *pzjcol = COMPLEX(jcol); for (R_xlen_t i=1; i Date: Thu, 18 Jul 2019 20:10:21 -0700 Subject: [PATCH 23/25] replaced big block with smaller modification to the main loop; also halves the working ram needed as it reuses the same tmp for the rerun --- src/forder.c | 68 ++++++++++++++------------------------------------- src/reorder.c | 1 + 2 files changed, 19 insertions(+), 50 deletions(-) diff --git a/src/forder.c b/src/forder.c index 9a390c8d9b..3dd2031a4b 100644 --- a/src/forder.c +++ b/src/forder.c @@ -449,55 +449,6 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow); if (TYPEOF(VECTOR_ELT(DT, by_i-1)) == CPLXSXP) n_cplx++; } - if (n_cplx) { - // we don't expect users to need complex sorting extensively - // or on massive data sets, so we take the approach of - // splitting a complex vector into its real & imaginary parts - // and using the regular forderv machinery to sort; a baremetal - // implementation would at root do the same, but the approach - // here is a bit more slapdash with respect to memory efficiency - // (seen clearly here at C from the 3+2*n_cplx PROTECT() calls) - int n_out = length(by) + n_cplx; - SEXP new_dt = PROTECT(allocVector(VECSXP, n_out)); n_protect++; - SEXP new_asc = PROTECT(allocVector(INTSXP, n_out)); n_protect++; - // will be simply 1:n_out - SEXP new_by = PROTECT(allocVector(INTSXP, n_out)); n_protect++; - int j = 0; - for (int i=0; i0) int spare=0; // the amount of bits remaining on the right of the current nradix byte bool isReal=false; + bool complexRerun = false; // see comments below in CPLXSXP case + SEXP CplxPart = R_NilValue; + if (n_cplx) { CplxPart=PROTECT(allocVector(REALSXP, nrow)); n_protect++; } // one alloc is reused for each part TEND(2); for (int col=0; col Date: Fri, 19 Jul 2019 13:09:14 -0700 Subject: [PATCH 24/25] memcmp for complex instead of == on double --- src/uniqlist.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/src/uniqlist.c b/src/uniqlist.c index 318722930b..a7f6577bd1 100644 --- a/src/uniqlist.c +++ b/src/uniqlist.c @@ -196,14 +196,13 @@ SEXP rleid(SEXP l, SEXP cols) { break; case REALSXP : { long long *ll = (long long *)REAL(jcol); - same = ll[i]==ll[i-1]; } + same = ll[i]==ll[i-1]; // 8 bytes of bits are identical. For real (no rounding currently) and integer64 // long long == 8 bytes checked in init.c - break; + } break; case CPLXSXP: { - // tried to make long long complex * but got a warning that it's a GNU extension Rcomplex *pz = COMPLEX(jcol); - same = pz[i].r == pz[i-1].r && pz[i].i == pz[i-1].i; + same = memcmp(&pz[i], &pz[i-1], sizeof(Rcomplex))==0; // compiler optimization should replace library call with best 16-byte fixed method } break; default : error("Type '%s' not supported", type2char(TYPEOF(jcol))); // # nocov @@ -215,32 +214,30 @@ SEXP rleid(SEXP l, SEXP cols) { SEXP jcol = VECTOR_ELT(l, icols[0]-1); switch (TYPEOF(jcol)) { case INTSXP : case LGLSXP : { - int *ijcol = INTEGER(jcol); - for (R_xlen_t i=1; i Date: Fri, 19 Jul 2019 14:21:46 -0700 Subject: [PATCH 25/25] news item tidy --- NEWS.md | 13 +++---------- inst/tests/tests.Rraw | 2 +- src/bmerge.c | 2 +- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index 85ec154838..35f51f66ea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -96,7 +96,7 @@ 16. `as.data.table` now unpacks columns in a `data.frame` which are themselves a `data.frame`. This need arises when parsing JSON, a corollary in [#3369](https://github.com/Rdatatable/data.table/issues/3369#issuecomment-462662752). `data.table` does not allow columns to be objects which themselves have columns (such as `matrix` and `data.frame`), unlike `data.frame` which does. Bug fix 19 in v1.12.2 (see below) added a helpful error (rather than segfault) to detect such invalid `data.table`, and promised that `as.data.table()` would unpack these columns in the next release (i.e. this release) so that the invalid `data.table` is not created in the first place. -17. `CJ` has been ported to C and parallelized, thanks to a PR by Michael Chirico, [#3596](https://github.com/Rdatatable/data.table/pull/3596). All types benefit (including newly supported complex, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690)), and as in many `data.table` operations, factors benefit more than character. +17. `CJ` has been ported to C and parallelized, thanks to a PR by Michael Chirico, [#3596](https://github.com/Rdatatable/data.table/pull/3596). All types benefit, but, as in many `data.table` operations, factors benefit more than character. ```R # default 4 threads on a laptop with 16GB RAM and 8 logical CPU @@ -114,7 +114,7 @@ # 0.357 0.763 0.292 # now ``` -18. New function `coalesce(...)` has been written in C, and is multithreaded for numeric, complex, and factor types. It replaces missing values according to a prioritized list of candidates (as per SQL COALESCE, `dplyr::coalesce`, and `hutils::coalesce`), [#3424](https://github.com/Rdatatable/data.table/issues/3424). It accepts any number of vectors in several forms. For example, given three vectors `x`, `y`, and `z`, where each `NA` in `x` is to be replaced by the corresponding value in `y` if that is non-NA, else the corresponding value in `z`, the following equivalent forms are all accepted: `coalesce(x,y,z)`, `coalesce(x,list(y,z))`, and `coalesce(list(x,y,z))`. +18. New function `coalesce(...)` has been written in C, and is multithreaded for `numeric` and `factor`. It replaces missing values according to a prioritized list of candidates (as per SQL COALESCE, `dplyr::coalesce`, and `hutils::coalesce`), [#3424](https://github.com/Rdatatable/data.table/issues/3424). It accepts any number of vectors in several forms. For example, given three vectors `x`, `y`, and `z`, where each `NA` in `x` is to be replaced by the corresponding value in `y` if that is non-NA, else the corresponding value in `z`, the following equivalent forms are all accepted: `coalesce(x,y,z)`, `coalesce(x,list(y,z))`, and `coalesce(list(x,y,z))`. ```R # default 4 threads on a laptop with 16GB RAM and 8 logical CPU @@ -131,12 +131,7 @@ # TRUE ``` -19. `shift` now supports type `complex`, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690). - -20. `setkey` now supports type `complex` as value columns (not as key columns), [#1444](https://github.com/Rdatatable/data.table/issues/1444). Thanks Gareth Ward for the report. - -21. Sorting now extended to complex vectors, [#1703](https://github.com/Rdatatable/data.table/issues/1703). Consistent with `base::order`, sorting is done lexicographically (`z1