From 0b5d251d0a662f27c284725aed683a69b38b9660 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 31 Jul 2019 12:32:13 +0200 Subject: [PATCH 1/2] gsum works on int64 column, closes #1647 #3464 --- NEWS.md | 2 + inst/tests/tests.Rraw | 15 ++++++ src/gsumm.c | 115 ++++++++++++++++++++++++++++++++---------- 3 files changed, 104 insertions(+), 28 deletions(-) diff --git a/NEWS.md b/NEWS.md index 411a589857..23ab4b4792 100644 --- a/NEWS.md +++ b/NEWS.md @@ -198,6 +198,8 @@ 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report. +25. `sum` by group of `integer64` column is now properly handled, [#1647](https://github.com/Rdatatable/data.table/issues/1647), [#3464](https://github.com/Rdatatable/data.table/issues/3464). Thanks to @mlandry22-h2o for the report. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index dbfb740e69..3f070c9076 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15428,6 +15428,21 @@ test(2071.10, dcast(data.table(a=1, b=1, l=list(list(1))), a ~ b, value.var='l') test(2071.11, dcast(data.table(a = 1, b = 2, c = 3), a ~ b, value.var = 'c', fill = '2'), data.table(a=1, `2`=3, key='a')) +# gsum int64 support #1647, #3464 +if (test_bit64) { + d = data.table(g=1:2, i32=c(2L,-1L,3L,4L), i64=as.integer64(c(2L,-1L,3L,4L))) + int64_int32_match = function(x, y) isTRUE(all.equal(lapply(x, as.integer), lapply(y, as.integer))) + test(2072.01, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) + test(2072.02, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) + d[3L, c("i32","i64") := list(NA_integer_, as.integer64(NA))] # some NA group + test(2072.03, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) + test(2072.04, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) + d[1L, c("i32","i64") := list(NA_integer_, as.integer64(NA))] # all NA group + test(2072.05, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) + test(2072.06, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) +} + + ################################### # Add new tests above this line # ################################### diff --git a/src/gsumm.c b/src/gsumm.c index 94a5342d8e..33b23ebd5c 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -426,37 +426,96 @@ SEXP gsum(SEXP x, SEXP narmArg) } } break; case REALSXP: { - const double *restrict gx = gather(x, &anyNA); - ans = PROTECT(allocVector(REALSXP, ngrp)); - double *restrict ansp = REAL(ans); - memset(ansp, 0, ngrp*sizeof(double)); - if (!narm || !anyNA) { - #pragma omp parallel for num_threads(getDTthreads()) - for (int h=0; h Date: Tue, 13 Aug 2019 17:12:42 -0700 Subject: [PATCH 2/2] test number increasing --- inst/tests/tests.Rraw | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0b62d157bd..58c3c63c4a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15657,19 +15657,18 @@ test(2076.02, X[on=Y], data.table(a=2:3, b=c(2L,NA_integer_), d=2:1)) test(2076.03, X[on=3], error="When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join") test(2076.04, X[on=list(3)], error="When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join") - # gsum int64 support #1647, #3464 if (test_bit64) { d = data.table(g=1:2, i32=c(2L,-1L,3L,4L), i64=as.integer64(c(2L,-1L,3L,4L))) int64_int32_match = function(x, y) isTRUE(all.equal(lapply(x, as.integer), lapply(y, as.integer))) - test(2072.01, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) - test(2072.02, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) + test(2077.01, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) + test(2077.02, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) d[3L, c("i32","i64") := list(NA_integer_, as.integer64(NA))] # some NA group - test(2072.03, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) - test(2072.04, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) + test(2077.03, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) + test(2077.04, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) d[1L, c("i32","i64") := list(NA_integer_, as.integer64(NA))] # all NA group - test(2072.05, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) - test(2072.06, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) + test(2077.05, int64_int32_match(d[, sum(i32), g], d[, sum(i64), g])) + test(2077.06, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) }