From 73a63ca5fe9911c57e9ab6212c3f39911cc8ec33 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 13:21:15 +0800 Subject: [PATCH 1/6] Closes #1444 -- setkey works on tables with complex columns --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 3 +++ src/reorder.c | 21 ++++++++++++++++----- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 10e3d5d5ea..81a50403ae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -128,6 +128,8 @@ identical(y1,y2) && identical(y1,y3) # TRUE ``` + +19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not well-ordered (and thus not supported as keys). Thanks @atalikami for the report. #### BUG FIXES diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1c720d4de9..838bf70306 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15200,6 +15200,9 @@ d2 = data.table(r = 1:5, s = seq(0L, 20L, 5L)) test(2062.1, d1[d2, on = .(a <= s, b >= s), j = .SD], ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20))) test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) +# #1444 -- support for ordering tables with complex columns +DT = data.table(a = 2:1, z = complex(0, 0:1)) +test(2063, setkey(DT, a), data.table(a = 1:2, z = complex(0, 1:0), key='a')) ################################### # Add new tests above this line # diff --git a/src/reorder.c b/src/reorder.c index 337d09b70f..62116def88 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -1,4 +1,5 @@ #include "data.table.h" +#include SEXP reorder(SEXP x, SEXP order) { @@ -13,8 +14,8 @@ SEXP reorder(SEXP x, SEXP order) ncol = length(x); for (int i=0; i maxSize) @@ -22,8 +23,8 @@ SEXP reorder(SEXP x, SEXP order) if (ALTREP(v)) SET_VECTOR_ELT(x,i,duplicate(v)); // expand compact vector in place ready for reordering by reference } } else { - if (SIZEOF(x)!=4 && SIZEOF(x)!=8) - error("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported", type2char(TYPEOF(x))); + if (SIZEOF(x)!=4 && SIZEOF(x)!=8 && SIZEOF(x)!=16) + error("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%d)", type2char(TYPEOF(x)), SIZEOF(x)); if (ALTREP(x)) error("Internal error in reorder.c: cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4 and report this as a bug."); // # nocov maxSize = SIZEOF(x); nrow = length(x); @@ -68,7 +69,7 @@ SEXP reorder(SEXP x, SEXP order) tmp[i] = cvd[idx[i]-1]; // copies 4 bytes; including pointers on 32bit (STRSXP and VECSXP) } memcpy(vd+start, tmp+start, (end-start+1)*size); - } else { + } else if (size==8) { double *vd = (double *)DATAPTR(v); const double *restrict cvd = vd; double *restrict tmp = (double *)TMP; @@ -77,6 +78,16 @@ SEXP reorder(SEXP x, SEXP order) tmp[i] = cvd[idx[i]-1]; // copies 8 bytes; including pointers on 64bit (STRSXP and VECSXP) } memcpy(vd+start, tmp+start, (end-start+1)*size); + } else { + // #1444 -- support for copying CPLXSXP (which have size 16) + double complex *vd = (double complex *)DATAPTR(v); + const double complex *restrict cvd = vd; + double complex *restrict tmp = (double complex *)TMP; + #pragma omp parallel for num_threads(getDTthreads()) + for (int i=start; i<=end; i++) { + tmp[i] = cvd[idx[i]-1]; // copies 16 bytes; including pointers on 64bit (STRSXP and VECSXP) + } + memcpy(vd+start, tmp+start, (end-start+1)*size); } } // It's ok to ignore write barrier, and in parallel too, only because this reorder() function accepts and checks From bdce4a5ff5f28ce5ef4d2c6fd8694a4aa156866f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 13:33:13 +0800 Subject: [PATCH 2/6] extra test for group operation mentioned in issue --- inst/tests/tests.Rraw | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 838bf70306..ccee85a383 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15202,7 +15202,8 @@ test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) # #1444 -- support for ordering tables with complex columns DT = data.table(a = 2:1, z = complex(0, 0:1)) -test(2063, setkey(DT, a), data.table(a = 1:2, z = complex(0, 1:0), key='a')) +test(2063.1, setkey(copy(DT), a), data.table(a = 1:2, z = complex(0, 1:0), key='a')) +test(2063.2, DT[ , abs(z), by = a], data.table(a = 2:1, V1 = c(0, 1))) ################################### # Add new tests above this line # From e6533dca399cc0fcb77ee776f0eb8166d404f7c9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 12 Jul 2019 00:05:07 +0800 Subject: [PATCH 3/6] new tests for coverage --- inst/tests/tests.Rraw | 9 +++++++-- src/reorder.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ccee85a383..c86b55b67f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15202,8 +15202,13 @@ test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) # #1444 -- support for ordering tables with complex columns DT = data.table(a = 2:1, z = complex(0, 0:1)) -test(2063.1, setkey(copy(DT), a), data.table(a = 1:2, z = complex(0, 1:0), key='a')) -test(2063.2, DT[ , abs(z), by = a], data.table(a = 2:1, V1 = c(0, 1))) +test(2063.1, setkey(copy(DT), a), data.table(a=1:2, z=complex(0, 1:0), key='a')) +test(2063.2, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(0, 1))) +# raw continues not to be supported +DT = data.table(ID=2:1, r=as.raw(0:1)) +test(2063.3, setkey(DT, ID), error="Item 2 of list is type 'raw'") +# setreordervec triggers !isNewList branch for coverage +test(2063.4, setreordervec(DT$r), error="reorder accepts vectors but this non-VECSXP") ################################### # Add new tests above this line # diff --git a/src/reorder.c b/src/reorder.c index 62116def88..d55d147283 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -124,7 +124,7 @@ SEXP setrev(SEXP x) { ((int *)xt)[len-1-j] = *(int *)tmp; } } else { - if (size!=8) error("Size of x isn't 4 or 8"); + if (size!=8) error("Size of x isn't 4, 8 or 16"); tmp = (char *)Calloc(1, double); if (!tmp) error("unable to allocate temporary working memory for reordering x"); for (j=0;j Date: Fri, 12 Jul 2019 00:20:24 +0800 Subject: [PATCH 4/6] missing arg --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c86b55b67f..777bed1706 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15208,7 +15208,7 @@ test(2063.2, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(0, 1))) DT = data.table(ID=2:1, r=as.raw(0:1)) test(2063.3, setkey(DT, ID), error="Item 2 of list is type 'raw'") # setreordervec triggers !isNewList branch for coverage -test(2063.4, setreordervec(DT$r), error="reorder accepts vectors but this non-VECSXP") +test(2063.4, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP") ################################### # Add new tests above this line # From 9a4cef49a6efc5f9f7d2f2fe2b5f84cf40cd0dbb Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 14 Jul 2019 03:43:24 +0800 Subject: [PATCH 5/6] switch to Rcomplex API --- NEWS.md | 2 +- src/reorder.c | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 81a50403ae..c28317ee24 100644 --- a/NEWS.md +++ b/NEWS.md @@ -129,7 +129,7 @@ # TRUE ``` -19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not well-ordered (and thus not supported as keys). Thanks @atalikami for the report. +19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not yet supported as keys). Thanks @atalikami for the report. #### BUG FIXES diff --git a/src/reorder.c b/src/reorder.c index d55d147283..b61286ff23 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -1,5 +1,4 @@ #include "data.table.h" -#include SEXP reorder(SEXP x, SEXP order) { @@ -80,9 +79,9 @@ SEXP reorder(SEXP x, SEXP order) memcpy(vd+start, tmp+start, (end-start+1)*size); } else { // #1444 -- support for copying CPLXSXP (which have size 16) - double complex *vd = (double complex *)DATAPTR(v); - const double complex *restrict cvd = vd; - double complex *restrict tmp = (double complex *)TMP; + Rcomplex *vd = (Rcomplex *)DATAPTR(v); + const Rcomplex *restrict cvd = vd; + Rcomplex *restrict tmp = (Rcomplex *)TMP; #pragma omp parallel for num_threads(getDTthreads()) for (int i=start; i<=end; i++) { tmp[i] = cvd[idx[i]-1]; // copies 16 bytes; including pointers on 64bit (STRSXP and VECSXP) From 14195c381222b8ab980e5f542ac108a18ad58e3d Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 17 Jul 2019 17:57:16 -0700 Subject: [PATCH 6/6] tweak news item and removed setrev as Michael suggested --- NEWS.md | 8 ++++---- R/setkey.R | 3 --- inst/tests/tests.Rraw | 29 +---------------------------- src/init.c | 2 -- src/reorder.c | 36 +----------------------------------- 5 files changed, 6 insertions(+), 72 deletions(-) diff --git a/NEWS.md b/NEWS.md index 656d843f85..176999e02d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -35,7 +35,7 @@ * Gains `yaml` argument matching that of `fread`, [#3534](https://github.com/Rdatatable/data.table/issues/3534). See the item in `fread` for a bit more detail; here, we'd like to reiterate that feedback is appreciated in the initial phase of rollout for this feature. * Gains `bom` argument to add a *byte order mark* (BOM) at the beginning of the file to signal that the file is encoded in UTF-8, [#3488](https://github.com/Rdatatable/data.table/issues/3488). Thanks to Stefan Fleck for requesting and Philippe Chataignon for implementing. - + * Now supports type `complex`, [#3690](https://github.com/Rdatatable/data.table/issues/3690). 4. Assigning to one item of a list column no longer requires the RHS to be wrapped with `list` or `.()`, [#950](https://github.com/Rdatatable/data.table/issues/950). @@ -130,10 +130,10 @@ identical(y1,y2) && identical(y1,y3) # TRUE ``` - -19. Support for `setkey` on `data.table`s with complex-valued columns, [#1444](https://github.com/Rdatatable/data.table/issues/1444). Note that complex columns themselves are not yet supported as keys). Thanks @atalikami for the report. -19. `shift` now supports complex vectors, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690). +19. `shift` now supports type `complex`, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690). + +20. `setkey` now supports type `complex` as value columns (not as key columns), [#1444](https://github.com/Rdatatable/data.table/issues/1444). Thanks Gareth Ward for the report. #### BUG FIXES diff --git a/R/setkey.R b/R/setkey.R index 66d476fb56..838c19eee9 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -142,9 +142,6 @@ getindex = function(x, name) { haskey = function(x) !is.null(key(x)) -# reverse a vector by reference (no copy) -setrev = function(x) .Call(Csetrev, x) - # reorder a vector based on 'order' (integer) # to be used in fastorder instead of x[o], but in general, it's better to replace vector subsetting with this..? # Basic checks that all items of order are in range 1:n with no NAs are now made inside Creorder. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6807b52dfa..a6781a6b6f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -46,7 +46,6 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { setcoalesce = data.table:::setcoalesce setdiff_ = data.table:::setdiff_ setreordervec = data.table:::setreordervec - setrev = data.table:::setrev shallow = data.table:::shallow # until exported .shallow = data.table:::.shallow split.data.table = data.table:::split.data.table @@ -3920,33 +3919,7 @@ test(1159.6, setDT(x), error="Argument 'x' to 'setDT' should be a") x <- list(1, 2:3) test(1159.7, setDT(x), error="All elements in argument 'x' to 'setDT'") -# tests for setrev -x <- sample(10) -y <- rev(x) -setrev(x) -test(1160.1, y, x) -x <- sample(c(1:10, NA), 21, TRUE) -y <- rev(x) -setrev(x) -test(1160.2, y, x) -x <- sample(runif(10)) -y <- rev(x) -setrev(x) -test(1160.3, y, x) -x <- sample(c(runif(10), NA, NaN), 21, TRUE) -y <- rev(x) -setrev(x) -test(1160.4, y, x) -x <- sample(letters) -y <- rev(x) -setrev(x) -test(1160.5, y, x) -x <- as.logical(sample(0:1, 20, TRUE)) -y <- rev(x) -setrev(x) -test(1160.6, y, x) -x <- list(1:10) -test(1160.7, setrev(x), error="Input 'x' must be a vector") +# test 1160 was for setrev, now removed # tests for setreordervec # integer diff --git a/src/init.c b/src/init.c index 2d93d3e374..dd2a1d6929 100644 --- a/src/init.c +++ b/src/init.c @@ -31,7 +31,6 @@ SEXP fmelt(); SEXP fcast(); SEXP uniqlist(); SEXP uniqlengths(); -SEXP setrev(); SEXP forder(); SEXP fsorted(); SEXP gforce(); @@ -116,7 +115,6 @@ R_CallMethodDef callMethods[] = { {"Cfcast", (DL_FUNC) &fcast, -1}, {"Cuniqlist", (DL_FUNC) &uniqlist, -1}, {"Cuniqlengths", (DL_FUNC) &uniqlengths, -1}, -{"Csetrev", (DL_FUNC) &setrev, -1}, {"Cforder", (DL_FUNC) &forder, -1}, {"Cfsorted", (DL_FUNC) &fsorted, -1}, {"Cgforce", (DL_FUNC) &gforce, -1}, diff --git a/src/reorder.c b/src/reorder.c index b61286ff23..e65a58f26c 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -84,7 +84,7 @@ SEXP reorder(SEXP x, SEXP order) Rcomplex *restrict tmp = (Rcomplex *)TMP; #pragma omp parallel for num_threads(getDTthreads()) for (int i=start; i<=end; i++) { - tmp[i] = cvd[idx[i]-1]; // copies 16 bytes; including pointers on 64bit (STRSXP and VECSXP) + tmp[i] = cvd[idx[i]-1]; // copies 16 bytes } memcpy(vd+start, tmp+start, (end-start+1)*size); } @@ -102,37 +102,3 @@ SEXP reorder(SEXP x, SEXP order) return(R_NilValue); } -// reverse a vector - equivalent of rev(x) in base, but implemented in C and about 12x faster (on 1e8) -SEXP setrev(SEXP x) { - R_len_t j, n, len; - size_t size; - char *tmp, *xt; - if (TYPEOF(x) == VECSXP || isMatrix(x)) error("Input 'x' must be a vector"); - len = length(x); - if (len <= 1) return(x); - size = SIZEOF(x); - if (!size) error("don't know how to reverse type '%s' of input 'x'.",type2char(TYPEOF(x))); - n = (int)(len/2); - xt = (char *)DATAPTR(x); - if (size==4) { - tmp = (char *)Calloc(1, int); - if (!tmp) error("unable to allocate temporary working memory for reordering x"); - for (j=0;j