From bf4fb109762dc12d7ee96227554bb3dc0c68018b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 14:30:22 +0800 Subject: [PATCH 01/20] initial take at #3639 -- proper handling of CPLXSXP in dogroups remove debugging helpers remove debugging helpers remove debugging helpers --- R/data.table.R | 1 - src/dogroups.c | 19 ++++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index a13597926b..823ae84cb5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -889,7 +889,6 @@ replace_order = function(isub, verbose, env) { } # else maybe a call to transform or something which returns a list. av = all.vars(jsub,TRUE) # TRUE fixes bug #1294 which didn't see b in j=fns[[b]](c) use.I = ".I" %chin% av - # browser() if (any(c(".SD","eval","get","mget") %chin% av)) { if (missing(.SDcols)) { # here we need to use 'dupdiff' instead of 'setdiff'. Ex: setdiff(c("x", "x"), NULL) will give 'x'. diff --git a/src/dogroups.c b/src/dogroups.c index 8009f70ae8..437c14720e 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -2,6 +2,7 @@ #include #include #include +#include SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose) { @@ -214,13 +215,20 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX rownum = iI[k]-1; td[k] = sd[rownum]; // on 32bit copies pointers too } - } else { // size 8 + } else if (size==8) { double *td = REAL(target); const double *sd = REAL(source); for (int k=0; k Date: Wed, 10 Jul 2019 18:24:17 +0800 Subject: [PATCH 02/20] main issue was name collision of I with that in complex.h; now working --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 9 +++++++++ src/dogroups.c | 27 +++++++++++++++------------ 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index 10e3d5d5ea..1754ecf805 100644 --- a/NEWS.md +++ b/NEWS.md @@ -192,6 +192,8 @@ 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report. +25. Complex columns used in `j` during grouping would get mangled, [#3639](https://github.com/Rdatatable/data.table/issues/3639). We still do not support grouping `by` a complex column; please file a feature request if you would use this in your own work. Thanks to @eliocamp for filing the bug report. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1c720d4de9..4e8e51c7f0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15200,6 +15200,15 @@ d2 = data.table(r = 1:5, s = seq(0L, 20L, 5L)) test(2062.1, d1[d2, on = .(a <= s, b >= s), j = .SD], ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20))) test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) +# #3639 -- complex values in grouping +set.seed(42) +DT = CJ(x = 1:10, a = c("a", "b"), b = 1:2) +DT[ , z := complex(rnorm(1:.N), rnorm(1:.N))] +## can simplify this test after #1444 +test(2063.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b)][order(x, a, b)], ignore.col.order = TRUE)) +test(2063.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i))) +test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V2 = c(16.031422657932, 13.533483145656))) + ################################### # Add new tests above this line # diff --git a/src/dogroups.c b/src/dogroups.c index 437c14720e..a4c56880de 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -8,7 +8,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX { R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0; int protecti=0; - SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp; + SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, DOTI, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp; Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE; clock_t tstart=0, tblock[10]={0}; int nblock[10]={0}; @@ -53,7 +53,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX for (R_len_t i=0; i maxGrpSize) maxGrpSize = ilens[i]; } - defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++; + // #3639 introduced complex.h, which defines I, so use DOTI to differentiate + defineVar(install(".I"), DOTI = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++; R_LockBinding(install(".I"), env); dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; // added here to fix #4990 - `:=` did not issue recycling warning during "by" @@ -77,7 +78,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX copyMostAttrib(VECTOR_ELT(dt,INTEGER(dtcols)[i]-1), VECTOR_ELT(SDall,i)); // not names, otherwise test 778 would fail } - origIlen = length(I); // test 762 has length(I)==1 but nrow(SD)==0 + origIlen = length(DOTI); // test 762 has length(DOTI)==1 but nrow(SD)==0 if (length(SDall)) origSDnrow = length(VECTOR_ELT(SDall, 0)); xknames = getAttrib(xSD, R_NamesSymbol); @@ -155,8 +156,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } } grpn = 1; // it may not be 1 e.g. test 722. TODO: revisit. - SETLENGTH(I, grpn); - INTEGER(I)[0] = 0; + SETLENGTH(DOTI, grpn); + INTEGER(DOTI)[0] = 0; for (int j=0; j0; From 0288c5f142546ac80a3b927bcb40129d1a102330 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 18:31:51 +0800 Subject: [PATCH 03/20] typo --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4e8e51c7f0..0ae0f541c4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15207,7 +15207,7 @@ DT[ , z := complex(rnorm(1:.N), rnorm(1:.N))] ## can simplify this test after #1444 test(2063.1, all.equal(setkey(copy(DT), NULL), DT[, .(x = x, z = z), by = .(a, b)][order(x, a, b)], ignore.col.order = TRUE)) test(2063.2, DT[ , base::sum(z), by = a], data.table(a = c('a', 'b'), V1 = c(5.0582228485073+0i, -1.8644229822705+0i))) -test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V2 = c(16.031422657932, 13.533483145656))) +test(2063.3, DT[ , sum(Mod(z)), by = b], data.table(b = 1:2, V1 = c(16.031422657932, 13.533483145656))) ################################### From 3f99784d4ee67ea057889296f8c939d8350a6164 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 19:36:25 +0800 Subject: [PATCH 04/20] add CPLXSXP case to some switches --- src/dogroups.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/dogroups.c b/src/dogroups.c index a4c56880de..361108a6a3 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -145,6 +145,11 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX case REALSXP : REAL(VECTOR_ELT(SDall,j))[0] = NA_REAL; break; + case CPLXSXP : + // no NA_COMPLEX; have to set r & i parts to NA_REAL individually + COMPLEX(VECTOR_ELT(SDall, j))[0].r = NA_REAL; + COMPLEX(VECTOR_ELT(SDall, j))[0].i = NA_REAL; + break; case STRSXP : SET_STRING_ELT(VECTOR_ELT(SDall,j),0,NA_STRING); break; @@ -169,6 +174,10 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX case REALSXP : REAL(VECTOR_ELT(xSD,j))[0] = NA_REAL; break; + case CPLXSXP : + COMPLEX(VECTOR_ELT(xSD, j))[0].r = NA_REAL; + COMPLEX(VECTOR_ELT(xSD, j))[0].i = NA_REAL; + break; case STRSXP : SET_STRING_ELT(VECTOR_ELT(xSD,j),0,NA_STRING); break; From 6ae663955331e1750ecc1eb98d22287ae338ace5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 10 Jul 2019 20:18:16 +0800 Subject: [PATCH 05/20] another switch() branch included --- src/dogroups.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/dogroups.c b/src/dogroups.c index 361108a6a3..4d18af118c 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -4,6 +4,12 @@ #include #include +// copied from r-source/src/main/Rcomplex.h +#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32)) +# undef I +# define I (__extension__ 1.0iF) +#endif + SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose) { R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0; @@ -440,6 +446,11 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX double *td = REAL(target)+thisansloc; for (int r=0; r Date: Thu, 11 Jul 2019 02:38:40 +0800 Subject: [PATCH 06/20] Implement gsum, gmean, gfirst, glast, g[ for complex vectors, part of #3690 --- src/data.table.h | 7 +++ src/gsumm.c | 157 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 155 insertions(+), 9 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index a2a6204051..052086a9fb 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -63,6 +63,13 @@ typedef R_xlen_t RLEN; #define ALTREP(x) 0 // for R<3.5.0, see issue #2866 and grep for "ALTREP" to see comments where it's used #endif +// for complex type support; copied from r-source/src/main/Rcomplex.h +#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32)) +# undef I +# define I (__extension__ 1.0iF) +#endif + + // init.c SEXP char_integer64; SEXP char_ITime; diff --git a/src/gsumm.c b/src/gsumm.c index 6263b33b65..c327988f72 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -1,5 +1,6 @@ #include "data.table.h" //#include +#include static int ngrp = 0; // number of groups static int *grpsize = NULL; // size of each group, used by gmean (and gmedian) not gsum @@ -298,8 +299,38 @@ void *gather(SEXP x, bool *anyNA) } } } break; + case CPLXSXP: { + const double complex *restrict thisx = (double complex *)COMPLEX(x); + #pragma omp parallel for num_threads(getDTthreads()) + for (int b=0; b DBL_MAX) ansd[i] = R_PosInf; - else if (s[i] < -DBL_MAX) ansd[i] = R_NegInf; - else ansd[i] = (double)s[i]; + switch(TYPEOF(x)) { + case LGLSXP: case INTSXP: case REALSXP: { + ans = PROTECT(allocVector(REALSXP, ngrp)); + double *ansd = REAL(ans); + for (int i=0; i DBL_MAX) ansd[i] = R_PosInf; + else if (s[i] < -DBL_MAX) ansd[i] = R_NegInf; + else ansd[i] = (double)s[i]; + } + } break; + case CPLXSXP: { + ans = PROTECT(allocVector(CPLXSXP, ngrp)); + double complex *ansd = (double complex *)COMPLEX(ans); + for (int i=0; i DBL_MAX) ansd[i] = R_PosInf + cimag(s[i])*I; + else if (creal(s[i]) < -DBL_MAX) ansd[i] = R_NegInf + cimag(s[i])*I; + if (cimag(s[i]) > DBL_MAX) ansd[i] = creal(s[i]) + R_PosInf*I; + else if (cimag(s[i]) < -DBL_MAX) ansd[i] = creal(s[i]) + R_NegInf*I; + else ansd[i] = (double complex)s[i]; + } + } } free(s); free(c); copyMostAttrib(x, ans); @@ -618,6 +717,9 @@ SEXP gmin(SEXP x, SEXP narm) } } break; + case CPLXSXP: + error("Type 'complex' has no well-defined min"); + break; default: error("Type '%s' not supported by GForce min (gmin). Either add the prefix base::min(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); } @@ -761,6 +863,9 @@ SEXP gmax(SEXP x, SEXP narm) } } break; + case CPLXSXP: + error("Type 'complex' has no well-defined max"); + break; default: error("Type '%s' not supported by GForce max (gmax). Either add the prefix base::max(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); } @@ -868,6 +973,17 @@ SEXP glast(SEXP x) { } } break; + case CPLXSXP: { + const double complex *dx = (double complex *)COMPLEX(x); + ans = PROTECT(allocVector(CPLXSXP, ngrp)); + double complex *dans = (double complex *)COMPLEX(ans); + for (i=0; i grpsize[i]) { COMPLEX(ans)[i].r = NA_REAL; COMPLEX(ans)[i].i = NA_REAL; continue; } + k = ff[i]+val-2; + if (isunsorted) k = oo[k]-1; + k = (irowslen == -1) ? k : irows[k]-1; + dans[i] = dx[k]; + } + } break; case STRSXP: ans = PROTECT(allocVector(STRSXP, ngrp)); for (i=0; i Date: Thu, 11 Jul 2019 08:38:18 +0800 Subject: [PATCH 07/20] add some tests --- inst/tests/tests.Rraw | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1c720d4de9..dae67ed6bc 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13569,7 +13569,6 @@ test(1967.78, x[1:5, sum(v), by = list(i5 = 1:5 %% 2L), verbose = TRUE], # gforce integer overflow coerce to double DT = data.table(A=1:5, B=-3i, C=2147483647L) -test(1968.1, DT[, sum(B), by=A%%2L], error="Type 'complex' not supported by GForce sum (gsum). Either add the") test(1968.2, storage.mode(DT$C), "integer") test(1968.3, DT[, sum(C), by=A%%2L], data.table(A=c(1L,0L), V1=c(6442450941, 4294967294)), warning="sum.*integer column.*more than type 'integer' can hold.*coerced to 'numeric'") @@ -14224,16 +14223,16 @@ if (test_bit64) { DT = data.table(id=c(rep(1L,3), rep(2L, 3)), v=bit64::as.integer64(c(1:3, 4L, 5:6))) test(2019, DT[2:6, sum(v), id], data.table(id=1:2, V1=bit64::as.integer64(c(5L,15L)))) # gather, case of int64 and irows } -DT = data.table(id = c(1L,1L,2L), v = c(1i, 2i, 3i)) -test(2020.01, DT[, min(v), by=id], error="'complex' not supported by GForce min") -test(2020.02, DT[, max(v), by=id], error="'complex' not supported by GForce max") -test(2020.03, DT[, median(v), by=id], error="'complex' not supported by GForce median") -test(2020.04, DT[, head(v, 1), by=id], error="'complex' not supported by GForce head") -test(2020.05, DT[, tail(v, 1), by=id], error="'complex' not supported by GForce tail") -test(2020.06, DT[, v[1], by=id], error="'complex' not supported by GForce subset") -test(2020.07, DT[, sd(v), by=id], error="'complex' not supported by GForce sd") -test(2020.08, DT[, var(v), by=id], error="'complex' not supported by GForce var") -test(2020.09, DT[, prod(v), by=id], error="'complex' not supported by GForce prod") +DT = data.table(id = c(1L,1L,2L), v = as.raw(0:2)) +test(2020.01, DT[, min(v), by=id], error="'raw' not supported by GForce min") +test(2020.02, DT[, max(v), by=id], error="'raw' not supported by GForce max") +test(2020.03, DT[, median(v), by=id], error="'raw' not supported by GForce median") +test(2020.04, DT[, head(v, 1), by=id], error="'raw' not supported by GForce head") +test(2020.05, DT[, tail(v, 1), by=id], error="'raw' not supported by GForce tail") +test(2020.06, DT[, v[1], by=id], error="'raw' not supported by GForce subset") +test(2020.07, DT[, sd(v), by=id], error="'raw' not supported by GForce sd") +test(2020.08, DT[, var(v), by=id], error="'raw' not supported by GForce var") +test(2020.09, DT[, prod(v), by=id], error="'raw' not supported by GForce prod") DT = data.table(id = c(1L,1L,2L,2L), v = c(1L, 2L, NA, NA)) test(2020.10, DT[, median(v), id], data.table(id=1:2, V1=c(1.5, NA))) # median whole group has NAs @@ -15200,6 +15199,16 @@ d2 = data.table(r = 1:5, s = seq(0L, 20L, 5L)) test(2062.1, d1[d2, on = .(a <= s, b >= s), j = .SD], ans<-data.table(a=INT(0,5,10,10,15,20), b=INT(0,5,10,10,15,20))) test(2062.2, d1[d2, on = .(a <= s, b >= s)][, .(a, b)], ans) +# GForce for complex columns, part of #3690 +DT = data.table(id=c(1L,1L,2L), v=c(1i, 2i, 3i)) +test(2063.01, DT[, min(v), by=id], error="'complex' has no well-defined min") +test(2063.02, DT[, max(v), by=id], error="'complex' has no well-defined max") +test(2063.03, DT[, head(v, 1), by=id], data.table(id=1:2, V1=c(1, 3)*1i)) +test(2063.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i)) +test(2063.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA))) +## former test 1968.1 +DT = data.table(A=1:5, B=-3i, C=2147483647L) +test(2063.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i))) ################################### # Add new tests above this line # From 1c1a97e894c291561974913fcfd24f986cb6fa92 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 14 Jul 2019 04:20:42 +0800 Subject: [PATCH 08/20] move to Rcomplex API --- src/data.table.h | 7 ----- src/gsumm.c | 73 ++++++++++++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/src/data.table.h b/src/data.table.h index 052086a9fb..a2a6204051 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -63,13 +63,6 @@ typedef R_xlen_t RLEN; #define ALTREP(x) 0 // for R<3.5.0, see issue #2866 and grep for "ALTREP" to see comments where it's used #endif -// for complex type support; copied from r-source/src/main/Rcomplex.h -#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32)) -# undef I -# define I (__extension__ 1.0iF) -#endif - - // init.c SEXP char_integer64; SEXP char_ITime; diff --git a/src/gsumm.c b/src/gsumm.c index c327988f72..79bef6767f 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -1,6 +1,5 @@ #include "data.table.h" //#include -#include static int ngrp = 0; // number of groups static int *grpsize = NULL; // size of each group, used by gmean (and gmedian) not gsum @@ -300,30 +299,30 @@ void *gather(SEXP x, bool *anyNA) } } break; case CPLXSXP: { - const double complex *restrict thisx = (double complex *)COMPLEX(x); + const Rcomplex *restrict thisx = COMPLEX(x); #pragma omp parallel for num_threads(getDTthreads()) for (int b=0; b DBL_MAX) ansd[i] = R_PosInf + cimag(s[i])*I; - else if (creal(s[i]) < -DBL_MAX) ansd[i] = R_NegInf + cimag(s[i])*I; - if (cimag(s[i]) > DBL_MAX) ansd[i] = creal(s[i]) + R_PosInf*I; - else if (cimag(s[i]) < -DBL_MAX) ansd[i] = creal(s[i]) + R_NegInf*I; - else ansd[i] = (double complex)s[i]; + if (s[i].r > DBL_MAX) { + Rcomplex REAL_TO_POS_INF = {R_PosInf, s[i].i}; + ansd[i] = REAL_TO_POS_INF; + } + else if (s[i].r < -DBL_MAX) { + Rcomplex REAL_TO_NEG_INF = {R_NegInf, s[i].i}; + ansd[i] = REAL_TO_NEG_INF; + } + if (s[i].i > DBL_MAX) { + Rcomplex IMAG_TO_POS_INF = {s[i].r, R_PosInf}; + ansd[i] = IMAG_TO_POS_INF; + } + else if (s[i].i < -DBL_MAX) { + Rcomplex IMAG_TO_NEG_INF = {s[i].r, R_NegInf}; + ansd[i] = IMAG_TO_NEG_INF; + } + else ansd[i] = s[i]; } } } @@ -974,9 +985,9 @@ SEXP glast(SEXP x) { } break; case CPLXSXP: { - const double complex *dx = (double complex *)COMPLEX(x); + const Rcomplex *dx = COMPLEX(x); ans = PROTECT(allocVector(CPLXSXP, ngrp)); - double complex *dans = (double complex *)COMPLEX(ans); + Rcomplex *dans = COMPLEX(ans); for (i=0; i grpsize[i]) { COMPLEX(ans)[i].r = NA_REAL; COMPLEX(ans)[i].i = NA_REAL; continue; } k = ff[i]+val-2; From c2aa0031bb363d7297cd65de4e05ce10a88da1f2 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 16 Jul 2019 18:05:42 -0700 Subject: [PATCH 09/20] got it working: use ISNAN not ==NA_REAL, and separate s and si in gmean --- src/gsumm.c | 55 +++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index 79bef6767f..eb85a1b907 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -315,14 +315,14 @@ void *gather(SEXP x, bool *anyNA) my_gx[ my_tmpcounts[my_high[i]]++ ] = elem; // typically just checking one component would be enough, // but ?complex suggests there may be some edge cases; better to be safe - if (elem.r==NA_REAL && elem.i == NA_REAL) my_anyNA = true; + if (ISNAN(elem.r) && ISNAN(elem.i)) my_anyNA = true; } } else { const int *my_x = irows + b*batchSize; for (int i=0; ii /= grpsize[i]; + xd->r /= grpsize[i]; + xd++; + } } break; default : error("Internal error: gsum returned type '%s'. typeof(x) is '%s'", type2char(TYPEOF(ans)), type2char(TYPEOF(x))); // # nocov @@ -535,7 +541,7 @@ SEXP gmean(SEXP x, SEXP narm) const int n = (irowslen == -1) ? length(x) : irowslen; if (nrow != n) error("nrow [%d] != length(x) [%d] in gsum", nrow, n); - long double *s = calloc(ngrp, sizeof(long double)); + long double *s = calloc(ngrp, sizeof(long double)), *si=NULL; // s = sum; si = sum imaginary just for complex if (!s) error("Unable to allocate %d * %d bytes for sum in gmean na.rm=TRUE", ngrp, sizeof(long double)); int *c = calloc(ngrp, sizeof(int)); @@ -564,11 +570,14 @@ SEXP gmean(SEXP x, SEXP narm) } break; case CPLXSXP: { const Rcomplex *xd = COMPLEX(x); + long double *si = calloc(ngrp, sizeof(long double)); + if (!si) error("Unable to allocate %d * %d bytes for si in gmean na.rm=TRUE", ngrp, sizeof(long double)); for (int i=0; i DBL_MAX) ansd[i] = R_PosInf; - else if (s[i] < -DBL_MAX) ansd[i] = R_NegInf; - else ansd[i] = (double)s[i]; + ansd[i] = s[i]>DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]); } } break; case CPLXSXP: { - ans = PROTECT(allocVector(CPLXSXP, ngrp)); + ans = PROTECT(allocVector(CPLXSXP, ngrp)); Rcomplex *ansd = COMPLEX(ans); for (int i=0; i DBL_MAX) { - Rcomplex REAL_TO_POS_INF = {R_PosInf, s[i].i}; - ansd[i] = REAL_TO_POS_INF; - } - else if (s[i].r < -DBL_MAX) { - Rcomplex REAL_TO_NEG_INF = {R_NegInf, s[i].i}; - ansd[i] = REAL_TO_NEG_INF; - } - if (s[i].i > DBL_MAX) { - Rcomplex IMAG_TO_POS_INF = {s[i].r, R_PosInf}; - ansd[i] = IMAG_TO_POS_INF; - } - else if (s[i].i < -DBL_MAX) { - Rcomplex IMAG_TO_NEG_INF = {s[i].r, R_NegInf}; - ansd[i] = IMAG_TO_NEG_INF; - } - else ansd[i] = s[i]; + si[i] /= c[i]; + ansd[i].r = s[i] >DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]); + ansd[i].i = si[i]>DBL_MAX ? R_PosInf : (si[i]< -DBL_MAX ? R_NegInf : (double)si[i]); } } } - free(s); free(c); + free(s); free(si); free(c); copyMostAttrib(x, ans); UNPROTECT(1); // Rprintf("this gmean na.rm=TRUE took %8.3f\n", 1.0*(clock()-start)/CLOCKS_PER_SEC); From b3704280a91494857956e11bb46b42691dbef38e Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 16 Jul 2019 18:06:35 -0700 Subject: [PATCH 10/20] comment tweak --- src/gsumm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gsumm.c b/src/gsumm.c index eb85a1b907..8670cafa1a 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -575,7 +575,7 @@ SEXP gmean(SEXP x, SEXP narm) for (int i=0; i Date: Tue, 16 Jul 2019 18:31:35 -0700 Subject: [PATCH 11/20] no need for complex.h in dogroups.c --- inst/tests/tests.Rraw | 1 + src/dogroups.c | 24 ++++++++---------------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b349c887fa..7598b56047 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15308,6 +15308,7 @@ test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA))) DT = data.table(A=1:5, B=-3i, C=2147483647L) test(2066.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i))) +test(9999, 3L, 3L) # dummy test ################################### # Add new tests above this line # diff --git a/src/dogroups.c b/src/dogroups.c index 849a9472af..5f8ee1a61f 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -2,19 +2,12 @@ #include #include #include -#include - -// copied from r-source/src/main/Rcomplex.h -#if defined(__GNUC__) && (defined(__sun__) || defined(__hpux__) || defined(Win32)) -# undef I -# define I (__extension__ 1.0iF) -#endif SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEXP xjiscols, SEXP grporder, SEXP order, SEXP starts, SEXP lens, SEXP jexp, SEXP env, SEXP lhs, SEXP newnames, SEXP on, SEXP verbose) { R_len_t rownum, ngrp, nrowgroups, njval=0, ngrpcols, ansloc=0, maxn, estn=-1, thisansloc, grpn, thislen, igrp, origIlen=0, origSDnrow=0; int protecti=0; - SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, DOTI, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp; + SEXP names, names2, xknames, bynames, dtnames, ans=NULL, jval, thiscol, BY, N, I, GRP, iSD, xSD, rownames, s, RHS, listwrap, target, source, tmp; Rboolean wasvector, firstalloc=FALSE, NullWarnDone=FALSE; clock_t tstart=0, tblock[10]={0}; int nblock[10]={0}; @@ -59,8 +52,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX for (R_len_t i=0; i maxGrpSize) maxGrpSize = ilens[i]; } - // #3639 introduced complex.h, which defines I, so use DOTI to differentiate - defineVar(install(".I"), DOTI = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++; + defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); protecti++; R_LockBinding(install(".I"), env); dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; // added here to fix #4990 - `:=` did not issue recycling warning during "by" @@ -84,7 +76,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX copyMostAttrib(VECTOR_ELT(dt,INTEGER(dtcols)[i]-1), VECTOR_ELT(SDall,i)); // not names, otherwise test 778 would fail } - origIlen = length(DOTI); // test 762 has length(DOTI)==1 but nrow(SD)==0 + origIlen = length(I); // test 762 has length(I)==1 but nrow(SD)==0 if (length(SDall)) origSDnrow = length(VECTOR_ELT(SDall, 0)); xknames = getAttrib(xSD, R_NamesSymbol); @@ -165,8 +157,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX } } grpn = 1; // it may not be 1 e.g. test 722. TODO: revisit. - SETLENGTH(DOTI, grpn); - INTEGER(DOTI)[0] = 0; + SETLENGTH(I, grpn); + INTEGER(I)[0] = 0; for (int j=0; j0; From 8f6225c2c8103e9c12390fbf5f5eed891aa28c21 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 16 Jul 2019 18:43:43 -0700 Subject: [PATCH 12/20] turned on verbose in last new test 2066.06 to trace crash on 32bit i386 Windows --- inst/tests/tests.Rraw | 2 +- src/gsumm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7598b56047..42cc7d87b1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15306,7 +15306,7 @@ test(2066.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i)) test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA))) ## former test 1968.1 DT = data.table(A=1:5, B=-3i, C=2147483647L) -test(2066.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i))) +test(2066.06, DT[, sum(B), by=A%%2L, verbose=TRUE], data.table(A = 1:0, V1 = c(-9i, -6i))) test(9999, 3L, 3L) # dummy test diff --git a/src/gsumm.c b/src/gsumm.c index 8670cafa1a..84ccd83a72 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -508,7 +508,7 @@ SEXP gsum(SEXP x, SEXP narmArg) SEXP gmean(SEXP x, SEXP narm) { - SEXP ans; + SEXP ans=R_NilValue; int protecti=0; //clock_t start = clock(); if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error("na.rm must be TRUE or FALSE"); From a53f159badb689d92dc928f88fd5c5ad2fa657d8 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 16 Jul 2019 19:08:24 -0700 Subject: [PATCH 13/20] more tracing --- src/gsumm.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index 84ccd83a72..9fd721fabf 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -38,7 +38,8 @@ static int nbit(int n) } SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) { - // double started = wallclock(); + double started = wallclock(); + const bool verbose = GetVerbose(); if (TYPEOF(env) != ENVSXP) error("env is not an environment"); // The type of jsub is pretty flexbile in R, so leave checking to eval() below. if (!isInteger(o)) error("o is not an integer vector"); @@ -94,7 +95,7 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) { int *elem = grp + fp[g]-1; for (int j=0; j Date: Tue, 16 Jul 2019 19:26:09 -0700 Subject: [PATCH 14/20] more tracing --- R/data.table.R | 1 + src/gsumm.c | 1 + 2 files changed, 2 insertions(+) diff --git a/R/data.table.R b/R/data.table.R index fea514aeef..2bcf03b9b0 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1725,6 +1725,7 @@ replace_order = function(isub, verbose, env) { #fix for #1683 if (use.I) assign(".I", seq_len(nrow(x)), thisEnv) ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971. + if (verbose) { cat("Returned from gforce()\n"); flush.console(); } gi = if (length(o__)) o__[f__] else f__ g = lapply(grpcols, function(i) groups[[i]][gi]) ans = c(g, ans) diff --git a/src/gsumm.c b/src/gsumm.c index 9fd721fabf..2abcda51dd 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -199,6 +199,7 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) { if (verbose) { Rprintf("gforce eval took %.3f\n", wallclock()-started); started=wallclock(); } // if this eval() fails with R error, R will release grp for us. Which is why we use R_alloc above. if (isVectorAtomic(ans)) { + if (verbose) Rprintf("isVectorAtomic(ans)==TRUE\n"); SEXP tt = ans; ans = PROTECT(allocVector(VECSXP, 1)); SET_VECTOR_ELT(ans, 0, tt); From 8e761bb0ff0ce3ef52c72d9275c28b909b6fedb2 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 16 Jul 2019 19:41:14 -0700 Subject: [PATCH 15/20] more tracing on i386 --- R/data.table.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 2bcf03b9b0..d0fcc02af8 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1725,10 +1725,13 @@ replace_order = function(isub, verbose, env) { #fix for #1683 if (use.I) assign(".I", seq_len(nrow(x)), thisEnv) ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971. - if (verbose) { cat("Returned from gforce()\n"); flush.console(); } + if (verbose) { cat("Returned from gforce()\n"); flush.console(); print(ans); flush.console(); } gi = if (length(o__)) o__[f__] else f__ + if (verbose) { cat("point 1\n"); flush.console(); } g = lapply(grpcols, function(i) groups[[i]][gi]) + if (verbose) { cat("point 2\n"); flush.console(); } ans = c(g, ans) + if (verbose) { cat("point 3\n"); flush.console(); } } else { ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose) } From 07c393ab0c93ca7d2f722260b7762124b0572ba2 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 16 Jul 2019 20:44:19 -0700 Subject: [PATCH 16/20] more tracing --- R/data.table.R | 2 ++ src/gsumm.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index d0fcc02af8..9a2989bd20 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1735,7 +1735,9 @@ replace_order = function(isub, verbose, env) { } else { ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose) } + if (verbose) { cat("point 4\n"); flush.console(); } if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} + if (verbose) { cat("point 5\n"); flush.console(); } # TO DO: xrows would be a better name for irows: irows means the rows of x that i joins to # Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x # Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__) diff --git a/src/gsumm.c b/src/gsumm.c index 2abcda51dd..b663335891 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -576,7 +576,7 @@ SEXP gmean(SEXP x, SEXP narm) } break; case CPLXSXP: { const Rcomplex *xd = COMPLEX(x); - long double *si = calloc(ngrp, sizeof(long double)); + si = calloc(ngrp, sizeof(long double)); if (!si) error("Unable to allocate %d * %d bytes for si in gmean na.rm=TRUE", ngrp, sizeof(long double)); for (int i=0; i grpsize[i]) { COMPLEX(ans)[i].r = NA_REAL; COMPLEX(ans)[i].i = NA_REAL; continue; } + if (val > grpsize[i]) { dans[i].r = NA_REAL; dans[i].i = NA_REAL; continue; } k = ff[i]+val-2; if (isunsorted) k = oo[k]-1; k = (irowslen == -1) ? k : irows[k]-1; From 802a42a628f67b3b8304d5907e5b14196224721b Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 16 Jul 2019 21:41:59 -0700 Subject: [PATCH 17/20] cleared a few rchk messages --- src/coalesce.c | 11 +++++++---- src/gsumm.c | 10 ++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/coalesce.c b/src/coalesce.c index d1cb2bc0aa..e57d63eb4c 100644 --- a/src/coalesce.c +++ b/src/coalesce.c @@ -7,6 +7,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) { error("Internal error in coalesce.c: argument 'inplaceArg' must be TRUE or FALSE"); // # nocov const bool inplace = LOGICAL(inplaceArg)[0]; const bool verbose = GetVerbose(); + int nprotect = 0; if (length(x)==0) return R_NilValue; SEXP first; // the first vector (it might be the first argument, or it might be the first column of a data.table|frame int off = 1; // when x has been pointed to the list of replacement candidates, is the first candidate in position 0 or 1 in the list @@ -29,21 +30,23 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) { if (factor) { if (!isFactor(item)) error("Item 1 is a factor but item %d is not a factor. When factors are involved, all items must be factor.", i+2); - if (!R_compute_identical(getAttrib(first, R_LevelsSymbol), getAttrib(item, R_LevelsSymbol), 0)) + if (!R_compute_identical(PROTECT(getAttrib(first, R_LevelsSymbol)), PROTECT(getAttrib(item, R_LevelsSymbol)), 0)) error("Item %d is a factor but its levels are not identical to the first item's levels.", i+2); + UNPROTECT(2); } else { if (isFactor(item)) error("Item %d is a factor but item 1 is not a factor. When factors are involved, all items must be factor.", i+2); } if (TYPEOF(first) != TYPEOF(item)) error("Item %d is type %s but the first item is type %s. Please coerce before coalescing.", i+2, type2char(TYPEOF(item)), type2char(TYPEOF(first))); - if (!R_compute_identical(getAttrib(first, R_ClassSymbol), getAttrib(item, R_ClassSymbol), 0)) + if (!R_compute_identical(PROTECT(getAttrib(first, R_ClassSymbol)), PROTECT(getAttrib(item, R_ClassSymbol)), 0)) error("Item %d has a different class than item 1.", i+2); + UNPROTECT(2); if (length(item)!=1 && length(item)!=nrow) error("Item %d is length %d but the first item is length %d. Only singletons are recycled.", i+2, length(item), nrow); } if (!inplace) { - first = PROTECT(duplicate(first)); + first = PROTECT(duplicate(first)); nprotect++; if (verbose) Rprintf("coalesce copied first item (inplace=FALSE)\n"); } void **valP = (void **)R_alloc(nval, sizeof(void *)); @@ -140,7 +143,7 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) { default: error("Unsupported type: %s", type2char(TYPEOF(first))); // e.g. raw is tested } - if (!inplace) UNPROTECT(1); + UNPROTECT(nprotect); return first; } diff --git a/src/gsumm.c b/src/gsumm.c index b663335891..208fc61678 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -507,20 +507,20 @@ SEXP gsum(SEXP x, SEXP narmArg) error("Type '%s' not supported by GForce sum (gsum). Either add the prefix base::sum(.) or turn off GForce optimization using options(datatable.optimize=1)", type2char(TYPEOF(x))); } copyMostAttrib(x, ans); - UNPROTECT(1); if (verbose) { Rprintf("%.3fs\n", wallclock()-started); } + UNPROTECT(1); return(ans); } SEXP gmean(SEXP x, SEXP narm) { SEXP ans=R_NilValue; - int protecti=0; //clock_t start = clock(); if (!isLogical(narm) || LENGTH(narm)!=1 || LOGICAL(narm)[0]==NA_LOGICAL) error("na.rm must be TRUE or FALSE"); if (!isVectorAtomic(x)) error("GForce mean can only be applied to columns, not .SD or similar. Likely you're looking for 'DT[,lapply(.SD,mean),by=,.SDcols=]'. See ?data.table."); if (inherits(x, "factor")) error("mean is not meaningful for factors."); if (!LOGICAL(narm)[0]) { + int protecti=0; ans = PROTECT(gsum(x,narm)); protecti++; switch(TYPEOF(ans)) { case LGLSXP: case INTSXP: @@ -611,12 +611,14 @@ SEXP gmean(SEXP x, SEXP narm) ansd[i].r = s[i] >DBL_MAX ? R_PosInf : (s[i] < -DBL_MAX ? R_NegInf : (double)s[i]); ansd[i].i = si[i]>DBL_MAX ? R_PosInf : (si[i]< -DBL_MAX ? R_NegInf : (double)si[i]); } - } + } break; + default: + error("Internal error: unsupported type at the end of gmean"); // # nocov } free(s); free(si); free(c); copyMostAttrib(x, ans); - UNPROTECT(1); // Rprintf("this gmean na.rm=TRUE took %8.3f\n", 1.0*(clock()-start)/CLOCKS_PER_SEC); + UNPROTECT(1); return(ans); } From 06c7089bb8c26d2fb8da71f54a174156620b248d Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 17 Jul 2019 11:33:01 -0700 Subject: [PATCH 18/20] torture by Rstrict yielded --- src/gsumm.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index 208fc61678..69839804a3 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -96,6 +96,7 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) { for (int j=0; j Date: Wed, 17 Jul 2019 12:21:31 -0700 Subject: [PATCH 19/20] remove tracing --- R/data.table.R | 6 ------ inst/tests/tests.Rraw | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 9a2989bd20..fea514aeef 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1725,19 +1725,13 @@ replace_order = function(isub, verbose, env) { #fix for #1683 if (use.I) assign(".I", seq_len(nrow(x)), thisEnv) ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971. - if (verbose) { cat("Returned from gforce()\n"); flush.console(); print(ans); flush.console(); } gi = if (length(o__)) o__[f__] else f__ - if (verbose) { cat("point 1\n"); flush.console(); } g = lapply(grpcols, function(i) groups[[i]][gi]) - if (verbose) { cat("point 2\n"); flush.console(); } ans = c(g, ans) - if (verbose) { cat("point 3\n"); flush.console(); } } else { ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose) } - if (verbose) { cat("point 4\n"); flush.console(); } if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} - if (verbose) { cat("point 5\n"); flush.console(); } # TO DO: xrows would be a better name for irows: irows means the rows of x that i joins to # Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x # Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 42cc7d87b1..b349c887fa 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15306,9 +15306,8 @@ test(2066.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i)) test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA))) ## former test 1968.1 DT = data.table(A=1:5, B=-3i, C=2147483647L) -test(2066.06, DT[, sum(B), by=A%%2L, verbose=TRUE], data.table(A = 1:0, V1 = c(-9i, -6i))) +test(2066.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i))) -test(9999, 3L, 3L) # dummy test ################################### # Add new tests above this line # From 4f2a97d5869b17ccd4528c2d1e6c6d4ce0a67692 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 17 Jul 2019 12:56:53 -0700 Subject: [PATCH 20/20] coverage --- inst/tests/tests.Rraw | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b349c887fa..6fb289dd54 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15306,7 +15306,13 @@ test(2066.04, DT[, tail(v, 1), by=id], data.table(id=1:2, V1=(2:3)*1i)) test(2066.05, DT[, v[2], by=id], data.table(id = 1:2, V1=c(2i, NA))) ## former test 1968.1 DT = data.table(A=1:5, B=-3i, C=2147483647L) -test(2066.06, DT[, sum(B), by=A%%2L], data.table(A = 1:0, V1 = c(-9i, -6i))) +test(2066.06, DT[, .(sum(B), mean(B)), by=A%%2L], data.table(A=1:0, V1=c(-9i, -6i), V2=-3i)) +test(2066.07, DT[2:4, .(sum(B), mean(B)), by=A%%2L], data.table(A=0:1, V1=c(-6i, -3i), V2=-3i)) +DT[4, B:=NA] +test(2066.08, DT[, .(sum(B), mean(B)), by=A%%2L], data.table(A=1:0, V1=c(-9i, NA), V2=c(-3i, NA))) +test(2066.09, DT[2:4, .(sum(B), mean(B)), by=A%%2L], data.table(A=0:1, V1=c(NA, -3i), V2=c(NA, -3i))) +test(2066.10, DT[, .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=1:0, V1=c(-9i, -3i), V2=-3i)) +test(2066.11, DT[2:4, .(sum(B, na.rm=TRUE), mean(B, na.rm=TRUE)), by=A%%2L], data.table(A=0:1, V1=c(-3i, -3i), V2=-3i)) ###################################