diff --git a/NEWS.md b/NEWS.md index 986b17ed09..bc6b599c35 100644 --- a/NEWS.md +++ b/NEWS.md @@ -226,6 +226,8 @@ 26. Column binding of zero column `data.table` will now work as expected, [#3334](https://github.com/Rdatatable/data.table/issues/3334). Thanks to @kzenstratus for the report. +27. `integer64` sum-by-group is now properly optimized, [#1647](https://github.com/Rdatatable/data.table/issues/1647), [#3464](https://github.com/Rdatatable/data.table/issues/3464). Thanks to @mlandry22-h2o for the report. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a436e557b4..58c3c63c4a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15657,6 +15657,20 @@ test(2076.02, X[on=Y], data.table(a=2:3, b=c(2L,NA_integer_), d=2:1)) test(2076.03, X[on=3], error="When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join") test(2076.04, X[on=list(3)], error="When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join") +# gsum int64 support #1647, #3464 +if (test_bit64) { + d = data.table(g=1:2, i32=c(2L,-1L,3L,4L), i64=as.integer64(c(2L,-1L,3L,4L))) + int64_int32_match = function(x, y) isTRUE(all.equal(lapply(x, as.integer), lapply(y, as.integer))) + test(2077.01, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) + test(2077.02, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) + d[3L, c("i32","i64") := list(NA_integer_, as.integer64(NA))] # some NA group + test(2077.03, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) + test(2077.04, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) + d[1L, c("i32","i64") := list(NA_integer_, as.integer64(NA))] # all NA group + test(2077.05, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) + test(2077.06, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) +} + ################################### # Add new tests above this line # diff --git a/src/gsumm.c b/src/gsumm.c index 94a5342d8e..33b23ebd5c 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -426,37 +426,96 @@ SEXP gsum(SEXP x, SEXP narmArg) } } break; case REALSXP: { - const double *restrict gx = gather(x, &anyNA); - ans = PROTECT(allocVector(REALSXP, ngrp)); - double *restrict ansp = REAL(ans); - memset(ansp, 0, ngrp*sizeof(double)); - if (!narm || !anyNA) { - #pragma omp parallel for num_threads(getDTthreads()) - for (int h=0; h