From bf4fb109762dc12d7ee96227554bb3dc0c68018b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 14:30:22 +0800 Subject: [PATCH 01/13] initial take at #3639 -- proper handling of CPLXSXP in dogroups remove debugging helpers remove debugging helpers remove debugging helpers --- R/data.table.R | 1 - src/dogroups.c | 19 ++++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index a13597926b..823ae84cb5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -889,7 +889,6 @@ replace_order = function(isub, verbose, env) { } # else maybe a call to transform or something which returns a list. av = all.vars(jsub,TRUE) # TRUE fixes bug #1294 which didn't see b in j=fns[[b]](c) use.I = ".I" %chin% av - # browser() if (any(c(".SD","eval","get","mget") %chin% av)) { if (missing(.SDcols)) { # here we need to use 'dupdiff' instead of 'setdiff'. Ex: setdiff(c("x", "x"), NULL) will give 'x'. diff --git a/src/dogroups.c b/src/dogroups.c index 8009f70ae8..437c14720e 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -2,6 +2,7 @@ #include #include #include +#include SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose) { @@ -214,13 +215,20 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX rownum = iI[k]-1; td[k] = sd[rownum]; // on 32bit copies pointers too } - } else { // size 8 + } else if (size==8) { double *td = REAL(target); const double *sd = REAL(source); for (int k=0; k Date: Wed, 10 Jul 2019 18:24:17 +0800 Subject: [PATCH 02/13] main issue was name collision of I with that in complex.h; now working --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 9 +++++++++ src/dogroups.c | 27 +++++++++++++++------------ 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index 10e3d5d5ea..1754ecf805 100644 --- a/NEWS.md +++ b/NEWS.md @@ -192,6 +192,8 @@ 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report. +25. Complex columns used in `j` during grouping would get mangled, [#3639](https://github.com/Rdatatable/data.table/issues/3639). We still do not support grouping `by` a complex column; please file a feature request if you would use this in your own work. Thanks to @eliocamp for filing the bug report. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1c720d4de9..4e8e51c7f0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15200,6 +15200,15 @@ d2 = data.table(r = 1:5, s = seq(0L, 20L, 5L)) test(2062.1, d1[d2, on = .(a <= s, b >= s), j = .SD], ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20))) test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) +# #3639 -- complex values in grouping +set.seed(42) +DT = CJ(x = 1:10, a = c("a", "b"), b = 1:2) +DT[ , z := complex(rnorm(1:.N), rnorm(1:.N))] +## can simplify this test after #1444 +test(2063.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b)][order(x, a, b)], ignore.col.order = TRUE)) +test(2063.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i))) +test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V2 = c(16.031422657932, 13.533483145656))) + ################################### # Add new tests above this line # diff --git a/src/dogroups.c b/src/dogroups.c index 437c14720e..a4c56880de 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -8,7 +8,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX { R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0; int protecti=0; - SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp; + SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, DOTI, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp; Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE; clock_t tstart=0, tblock[10]={0}; int nblock[10]={0}; @@ -53,7 +53,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX for (R_len_t i=0; i maxGrpSize) maxGrpSize = ilens[i]; } - defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++; + // #3639 introduced complex.h, which defines I, so use DOTI to differentiate + defineVar(install(".I"), DOTI = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++; R_LockBinding(install(".I"), env); dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; // added here to fix #4990 - `:=` did not issue recycling warning during "by" @@ -77,7 +78,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX copyMostAttrib(VECTOR_ELT(dt,INTEGER(dtcols)[i]-1), VECTOR_ELT(SDall,i)); // not names, otherwise test 778 would fail } - origIlen = length(I); // test 762 has length(I)==1 but nrow(SD)==0 + origIlen = length(DOTI); // test 762 has length(DOTI)==1 but nrow(SD)==0 if (length(SDall)) origSDnrow = length(VECTOR_ELT(SDall, 0)); xknames = getAttrib(xSD, R_NamesSymbol); @@ -155,8 +156,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } } grpn = 1; // it may not be 1 e.g. test 722. TODO: revisit. - SETLENGTH(I, grpn); - INTEGER(I)[0] = 0; + SETLENGTH(DOTI, grpn); + INTEGER(DOTI)[0] = 0; for (int j=0; j0; From 0288c5f142546ac80a3b927bcb40129d1a102330 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 18:31:51 +0800 Subject: [PATCH 03/13] typo --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4e8e51c7f0..0ae0f541c4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15207,7 +15207,7 @@ DT[ , z := complex(rnorm(1:.N), rnorm(1:.N))] ## can simplify this test after #1444 test(2063.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b)][order(x, a, b)], ignore.col.order = TRUE)) test(2063.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i))) -test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V2 = c(16.031422657932, 13.533483145656))) +test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V1 = c(16.031422657932, 13.533483145656))) ################################### From 3f99784d4ee67ea057889296f8c939d8350a6164 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 19:36:25 +0800 Subject: [PATCH 04/13] add CPLXSXP case to some switches --- src/dogroups.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/dogroups.c b/src/dogroups.c index a4c56880de..361108a6a3 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -145,6 +145,11 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX case REALSXP : REAL(VECTOR_ELT(SDall,j))[0] = NA_REAL; break; + case CPLXSXP : + // no NA_COMPLEX; have to set r & i parts to NA_REAL individually + COMPLEX(VECTOR_ELT(SDall, j))[0].r = NA_REAL; + COMPLEX(VECTOR_ELT(SDall, j))[0].i = NA_REAL; + break; case STRSXP : SET_STRING_ELT(VECTOR_ELT(SDall,j),0,NA_STRING); break; @@ -169,6 +174,10 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX case REALSXP : REAL(VECTOR_ELT(xSD,j))[0] = NA_REAL; break; + case CPLXSXP : + COMPLEX(VECTOR_ELT(xSD, j))[0].r = NA_REAL; + COMPLEX(VECTOR_ELT(xSD, j))[0].i = NA_REAL; + break; case STRSXP : SET_STRING_ELT(VECTOR_ELT(xSD,j),0,NA_STRING); break; From 6ae663955331e1750ecc1eb98d22287ae338ace5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 20:18:16 +0800 Subject: [PATCH 05/13] another switch() branch included --- src/dogroups.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/dogroups.c b/src/dogroups.c index 361108a6a3..4d18af118c 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -4,6 +4,12 @@ #include #include +// copied from r-source/src/main/Rcomplex.h +#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32)) +# undef I +# define I (__extension__ 1.0iF) +#endif + SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose) { R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0; @@ -440,6 +446,11 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX double *td = REAL(target)+thisansloc; for (int r=0; r Date: Thu, 11 Jul 2019 19:14:13 +0800 Subject: [PATCH 06/13] add fix&tests for assigning complex values (by subset or by group) --- NEWS.md | 2 +- inst/tests/tests.Rraw | 6 +++++- src/assign.c | 14 ++++++++++++++ src/data.table.h | 6 ++++++ src/dogroups.c | 6 ------ 5 files changed, 26 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1754ecf805..1e848e0bb9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -192,7 +192,7 @@ 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report. -25. Complex columns used in `j` during grouping would get mangled, [#3639](https://github.com/Rdatatable/data.table/issues/3639). We still do not support grouping `by` a complex column; please file a feature request if you would use this in your own work. Thanks to @eliocamp for filing the bug report. +25. Complex columns used in `j` during grouping would get mangled, [#3639](https://github.com/Rdatatable/data.table/issues/3639). A related bug prevented assigning complex values using `:=` except for full-column plonks. We still do not support grouping `by` a complex column; please file a feature request if you would use this in your own work. Thanks to @eliocamp for filing the bug report. #### NOTES diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0ae0f541c4..11ffafdefe 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15208,7 +15208,11 @@ DT[ , z := complex(rnorm(1:.N), rnorm(1:.N))] test(2063.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b)][order(x, a, b)], ignore.col.order = TRUE)) test(2063.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i))) test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V1 = c(16.031422657932, 13.533483145656))) - +# also works for assignment, as noted in #3690 +DT[ , z_sum := base::sum(z), by = .(a, b)] +test(2063.4, DT[ , z_sum := base::sum(z), by = .(a, b)][1:3, z_sum], + c(3.05896822455656+0i, -2.81616342436407+0i, -3.04864360704606+0i)) +test(2063.4, DT[1L, z_sum := 1i][1L, z_sum], 1i) ################################### # Add new tests above this line # diff --git a/src/assign.c b/src/assign.c index 3bde57ddb8..ecb61dbe21 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1,6 +1,7 @@ #include "data.table.h" #include #include +#include static void finalizer(SEXP p) { @@ -943,6 +944,15 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source) td[w-1] = sd[i&mask]; } } break; + case CPLXSXP: { + double complex *td = (double complex *)COMPLEX(target); + const double complex *sd = (double complex *)COMPLEX(source); + for (int i=0; i #include -// copied from r-source/src/main/Rcomplex.h -#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32)) -# undef I -# define I (__extension__ 1.0iF) -#endif - SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose) { R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0; From 34fec0ce97ea0d4c3be7e73d24b882457703dd87 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 11 Jul 2019 19:31:57 +0800 Subject: [PATCH 07/13] forgot to set seed for test --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 11ffafdefe..c8c2dd1190 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15211,8 +15211,8 @@ test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V1 = c(16.031422657 # also works for assignment, as noted in #3690 DT[ , z_sum := base::sum(z), by = .(a, b)] test(2063.4, DT[ , z_sum := base::sum(z), by = .(a, b)][1:3, z_sum], - c(3.05896822455656+0i, -2.81616342436407+0i, -3.04864360704606+0i)) -test(2063.4, DT[1L, z_sum := 1i][1L, z_sum], 1i) + c(1.8791864549242+0i, 3.17903639358309+0i, -4.18868631527035+0i)) +test(2063.5, DT[1L, z_sum := 1i][1L, z_sum], 1i) ################################### # Add new tests above this line # From 781df7ff24d2e72977b68eb559e7f8cd1fd88b6c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 12 Jul 2019 00:57:11 +0800 Subject: [PATCH 08/13] more tests --- inst/tests/tests.Rraw | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c8c2dd1190..281b3fba66 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15208,11 +15208,20 @@ DT[ , z := complex(rnorm(1:.N), rnorm(1:.N))] test(2063.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b)][order(x, a, b)], ignore.col.order = TRUE)) test(2063.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i))) test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V1 = c(16.031422657932, 13.533483145656))) +## mimicking test 171.3 for coverage +DT = data.table(A=c(25L,85L,25L,25L,85L), B=c("a","a","b","c","c"), z=0:4 + (4:0)*1i) +test(2063.4, DT[ , data.table(A, z)[A==25, z] + data.table(A, z)[A==85, z], by=B], + data.table(B = c('a', 'c'), V1 = c(1, 7) + (c(7, 1))*1i)) +## mimicking test 771 for coverage +a = data.table(first=1:6, third=c(1i,1,1i,3,3i,4), key="first") +b = data.table(first=c(3,4,4,5,6,7,8), second=1:7, key="first") +test(2063.5, b[ , third:=a[b, third, by=.EACHI]], error="Supplied 2 items to be assigned to 7 items of") + # also works for assignment, as noted in #3690 DT[ , z_sum := base::sum(z), by = .(a, b)] -test(2063.4, DT[ , z_sum := base::sum(z), by = .(a, b)][1:3, z_sum], +test(2063.6, DT[ , z_sum := base::sum(z), by = .(a, b)][1:3, z_sum], c(1.8791864549242+0i, 3.17903639358309+0i, -4.18868631527035+0i)) -test(2063.5, DT[1L, z_sum := 1i][1L, z_sum], 1i) +test(2063.7, DT[1L, z_sum := 1i][1L, z_sum], 1i) ################################### # Add new tests above this line # From d64f339caa9026fc8a6955889a170e49d3200191 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 12 Jul 2019 01:42:42 +0800 Subject: [PATCH 09/13] overwrote variable in tests --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 281b3fba66..a5d6197633 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15209,8 +15209,8 @@ test(2063.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b test(2063.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i))) test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V1 = c(16.031422657932, 13.533483145656))) ## mimicking test 171.3 for coverage -DT = data.table(A=c(25L,85L,25L,25L,85L), B=c("a","a","b","c","c"), z=0:4 + (4:0)*1i) -test(2063.4, DT[ , data.table(A, z)[A==25, z] + data.table(A, z)[A==85, z], by=B], +x = data.table(A=c(25L,85L,25L,25L,85L), B=c("a","a","b","c","c"), z=0:4 + (4:0)*1i) +test(2063.4, x[ , data.table(A, z)[A==25, z] + data.table(A, z)[A==85, z], by=B], data.table(B = c('a', 'c'), V1 = c(1, 7) + (c(7, 1))*1i)) ## mimicking test 771 for coverage a = data.table(first=1:6, third=c(1i,1,1i,3,3i,4), key="first") From 8967ee0e96399d29f84ffbef5c3aabf78bfd86d0 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 12 Jul 2019 07:58:01 +0800 Subject: [PATCH 10/13] coverage test --- inst/tests/tests.Rraw | 10 ++++++++++ src/dogroups.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a5d6197633..f57f651fdd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7326,6 +7326,16 @@ test(1540.35, DT1[DT3, lapply(.SD, function(x) x * mul), by=.EACHI, on=c(x="q", y="r"), roll=TRUE], DT1.copy[DT3, lapply(.SD, function(x) x * mul), by=.EACHI, roll=TRUE]) +## more coverage tests for by = .EACHI, on = c(LHS = 'RHS') for numeric type +set.seed(45L) +DT1 = data.table(x=sample(letters[1:3], 15, TRUE), y=sample(6:10, 15, TRUE), + a=sample(100, 15), b=runif(15)) +DT2 = CJ(x=letters[1:3], y=6:10)[, mul := sample(20, 15)][sample(15L, 5L)] +DT3 = rbindlist(list(DT2, list(x="d", y=7L, mul=100L))) +DT3 = DT3[sample(nrow(DT3))] +DT1[ , x_num := match(x, letters) + .1] +DT3[ , x_num := match(x, letters) + .1] +test(1540.36, DT1[DT3, .(x_num), by=.EACHI, on=c(x_num="x_num")]) # to do: add tests for := diff --git a/src/dogroups.c b/src/dogroups.c index 0b4edf9503..c7e4d42032 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -185,7 +185,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX SET_VECTOR_ELT(VECTOR_ELT(xSD,j),0,R_NilValue); break; default: - error("Logical error. Type of column should have been checked by now"); + error("Logical error. Type of column should have been checked by now"); // #nocov } } } else { From d34989a42f0ddf758ddb3ac95a3b3bf783388ffa Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 12 Jul 2019 09:43:37 +0800 Subject: [PATCH 11/13] add expected output --- inst/tests/tests.Rraw | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f57f651fdd..5249bb4a70 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7335,7 +7335,8 @@ DT3 = rbindlist(list(DT2, list(x="d", y=7L, mul=100L))) DT3 = DT3[sample(nrow(DT3))] DT1[ , x_num := match(x, letters) + .1] DT3[ , x_num := match(x, letters) + .1] -test(1540.36, DT1[DT3, .(x_num), by=.EACHI, on=c(x_num="x_num")]) +test(1540.36, DT1[DT3[1:3], .(y = x_num), by=.EACHI, on=c(x_num="x_num")], + data.table(x_num = c(3.1, 4.1, 3.1), y = c(2.1, NA, NA))) # to do: add tests for := From 6bf80b1d53981bd35e8d4bb2fe0206a4c6ffc27f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 14 Jul 2019 04:02:51 +0800 Subject: [PATCH 12/13] move to Rcomplex API --- NEWS.md | 2 +- src/assign.c | 10 +++++----- src/data.table.h | 6 ------ src/dogroups.c | 48 +++++++++++++++++++++++------------------------- 4 files changed, 29 insertions(+), 37 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1e848e0bb9..682d823a8c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -192,7 +192,7 @@ 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report. -25. Complex columns used in `j` during grouping would get mangled, [#3639](https://github.com/Rdatatable/data.table/issues/3639). A related bug prevented assigning complex values using `:=` except for full-column plonks. We still do not support grouping `by` a complex column; please file a feature request if you would use this in your own work. Thanks to @eliocamp for filing the bug report. +25. Complex columns used in `j` during grouping would get mangled, [#3639](https://github.com/Rdatatable/data.table/issues/3639). A related bug prevented assigning complex values using `:=` except for full-column plonks. We still do not support grouping `by` a complex column. Thanks to @eliocamp for filing the bug report. #### NOTES diff --git a/src/assign.c b/src/assign.c index ecb61dbe21..14e9d86e3c 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1,7 +1,6 @@ #include "data.table.h" #include #include -#include static void finalizer(SEXP p) { @@ -945,8 +944,8 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source) } } break; case CPLXSXP: { - double complex *td = (double complex *)COMPLEX(target); - const double complex *sd = (double complex *)COMPLEX(source); + Rcomplex *td = COMPLEX(target); + const Rcomplex *sd = COMPLEX(source); for (int i=0; i #include #include -#include SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose) { R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0; int protecti=0; - SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, DOTI, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp; + SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp; Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE; clock_t tstart=0, tblock[10]={0}; int nblock[10]={0}; @@ -53,8 +52,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX for (R_len_t i=0; i maxGrpSize) maxGrpSize = ilens[i]; } - // #3639 introduced complex.h, which defines I, so use DOTI to differentiate - defineVar(install(".I"), DOTI = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++; + defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++; R_LockBinding(install(".I"), env); dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; // added here to fix #4990 - `:=` did not issue recycling warning during "by" @@ -78,7 +76,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX copyMostAttrib(VECTOR_ELT(dt,INTEGER(dtcols)[i]-1), VECTOR_ELT(SDall,i)); // not names, otherwise test 778 would fail } - origIlen = length(DOTI); // test 762 has length(DOTI)==1 but nrow(SD)==0 + origIlen = length(I); // test 762 has length(I)==1 but nrow(SD)==0 if (length(SDall)) origSDnrow = length(VECTOR_ELT(SDall, 0)); xknames = getAttrib(xSD, R_NamesSymbol); @@ -145,11 +143,10 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX case REALSXP : REAL(VECTOR_ELT(SDall,j))[0] = NA_REAL; break; - case CPLXSXP : - // no NA_COMPLEX; have to set r & i parts to NA_REAL individually - COMPLEX(VECTOR_ELT(SDall, j))[0].r = NA_REAL; - COMPLEX(VECTOR_ELT(SDall, j))[0].i = NA_REAL; - break; + case CPLXSXP : { + Rcomplex NA_CPLX = {NA_REAL, NA_REAL}; + COMPLEX(VECTOR_ELT(SDall, j))[0] = NA_CPLX; + } break; case STRSXP : SET_STRING_ELT(VECTOR_ELT(SDall,j),0,NA_STRING); break; @@ -161,8 +158,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } } grpn = 1; // it may not be 1 e.g. test 722. TODO: revisit. - SETLENGTH(DOTI, grpn); - INTEGER(DOTI)[0] = 0; + SETLENGTH(I, grpn); + INTEGER(I)[0] = 0; for (int j=0; j0; From 525842174fedf4cbdc987b85046bde016e57266b Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 16 Jul 2019 14:55:34 -0700 Subject: [PATCH 13/13] moved NA_CPLX to be defined once in .h and init.c --- src/assign.c | 1 - src/data.table.h | 1 + src/dogroups.c | 10 +++------- src/init.c | 5 ++++- src/subset.c | 1 - 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/assign.c b/src/assign.c index 14e9d86e3c..d7f5d45785 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1013,7 +1013,6 @@ void writeNA(SEXP v, const int from, const int n) } break; case CPLXSXP: { Rcomplex *vd = COMPLEX(v); - Rcomplex NA_CPLX = {NA_REAL, NA_REAL}; for (int i=from; i<=to; ++i) vd[i] = NA_CPLX; } break; case STRSXP : diff --git a/src/data.table.h b/src/data.table.h index 55c458d077..35e17e4875 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -92,6 +92,7 @@ double LLtoD(long long x); bool GetVerbose(); double NA_INT64_D; long long NA_INT64_LL; +Rcomplex NA_CPLX; // initialized in init.c; see there for comments // cj.c SEXP cj(SEXP base_list); diff --git a/src/dogroups.c b/src/dogroups.c index f839920570..5f8ee1a61f 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -144,7 +144,6 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX REAL(VECTOR_ELT(SDall,j))[0] = NA_REAL; break; case CPLXSXP : { - Rcomplex NA_CPLX = {NA_REAL, NA_REAL}; COMPLEX(VECTOR_ELT(SDall, j))[0] = NA_CPLX; } break; case STRSXP : @@ -172,7 +171,6 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX REAL(VECTOR_ELT(xSD,j))[0] = NA_REAL; break; case CPLXSXP : { - Rcomplex NA_CPLX = {NA_REAL, NA_REAL}; COMPLEX(VECTOR_ELT(xSD, j))[0] = NA_CPLX; } break; case STRSXP : @@ -438,11 +436,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX for (int r=0; r