From 58a9f55a7697973d97bd42f83f9d0bdcc3c48669 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 14 May 2020 11:35:50 +0800 Subject: [PATCH 1/4] gmin/gmax properly support integer64 --- NEWS.md | 2 + inst/tests/tests.Rraw | 19 +++++ src/gsumm.c | 164 +++++++++++++++++++++++++++++++++--------- 3 files changed, 153 insertions(+), 32 deletions(-) diff --git a/NEWS.md b/NEWS.md index 026f06c86e..657ba4783e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -109,6 +109,8 @@ unit = "s") 13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388). +14. `GForce` handles `integer64` input correctly for `min` and `max` with missing values, [#4444](https://github.com/Rdatatable/data.table/issues/4388). Thanks @go-see for the report. + ## NOTES 0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ed17470383..93030e7653 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16853,3 +16853,22 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN)) test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) + +# gmin/gmax + NA values + na.rm, #4444 +if (test_bit64) { + DT = data.table(grp=c(1L, 1L, 1L, 2L), i64=as.integer64(c(NA, 1:3))) + old = options(datatable.optimize=2L) + test(2139.1, DT[ , min(i64), by=grp], data.table(grp=1:2, V1=as.integer64(c(NA, 3)))) + test(2139.2, DT[ , min(i64, na.rm=TRUE), by=grp], data.table(grp=1:2, V1=as.integer64(c(1, 3)))) + test(2139.3, DT[ , max(i64), by=grp], data.table(grp=1:2, V1=as.integer64(c(NA, 3)))) + test(2139.4, DT[ , max(i64, na.rm=TRUE), by=grp], data.table(grp=1:2, V1=as.integer64(2:3))) + # force an all-NA group + DT[ , i64 := rev(i64)] + i64_default_min = suppressWarnings(min(as.integer64(NA), na.rm=TRUE)) + i64_default_max = suppressWarnings(max(as.integer64(NA), na.rm=TRUE)) + test(2139.5, DT[ , min(i64, na.rm=TRUE), by=grp], data.table(grp=1:2, V1=c(as.integer64(1), i64_default_min)), + warning="No non-missing values found in at least one group") + test(2139.6, DT[ , max(i64, na.rm=TRUE), by=grp], data.table(grp=1:2, V1=c(as.integer64(3), i64_default_max)), + warning="No non-missing values found in at least one group") + options(old) +} diff --git a/src/gsumm.c b/src/gsumm.c index ef63519a3c..645bc17500 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -761,28 +761,62 @@ SEXP gmin(SEXP x, SEXP narm) break; case REALSXP: ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++; - if (!LOGICAL(narm)[0]) { - for (i=0; i ansp[thisgrp]) + ansp[thisgrp] = xp[ix]; + } + for (i=0; i ansp[thisgrp]) + ansp[thisgrp] = xp[ix]; + } + } + } else { // non-int64 + double *ansp = REAL(ans); + double *xp = REAL(x); + if (LOGICAL(narm)[0]) { + for (i=0; i ansp[thisgrp]) + ansp[thisgrp] = xp[ix]; + } + for (i=0; i ansp[thisgrp]) + ansp[thisgrp] = xp[ix]; + } + } + } + /*case REALSXP: + ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++; + const bool isInt64 = INHERITS(x, char_integer64); + double *ansp = REAL(ans); + int64_t *xi64p = (int64_t *)REAL(x); + double *xp = REAL(x); + for (i=0; i Date: Thu, 14 May 2020 11:36:54 +0800 Subject: [PATCH 2/4] remove old code --- src/gsumm.c | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index 645bc17500..a7ebe57185 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -989,49 +989,6 @@ SEXP gmax(SEXP x, SEXP narm) } } } - /*case REALSXP: - ans = PROTECT(allocVector(REALSXP, ngrp)); protecti++; - const bool isInt64 = INHERITS(x, char_integer64); - double *ansp = REAL(ans); - int64_t *xi64p = (int64_t *)REAL(x); - double *xp = REAL(x); - for (i=0; i Date: Thu, 14 May 2020 11:43:09 +0800 Subject: [PATCH 3/4] fix NEWS item --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 657ba4783e..c15fb0d855 100644 --- a/NEWS.md +++ b/NEWS.md @@ -109,7 +109,7 @@ unit = "s") 13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388). -14. `GForce` handles `integer64` input correctly for `min` and `max` with missing values, [#4444](https://github.com/Rdatatable/data.table/issues/4388). Thanks @go-see for the report. +14. `GForce` handles `integer64` input correctly for `min` and `max` with missing values, [#4444](https://github.com/Rdatatable/data.table/issues/4444). Thanks @go-see for the report. ## NOTES From e6791b44e7e87291a7fdff2562816fc978285955 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 14 May 2020 11:56:02 +0800 Subject: [PATCH 4/4] print actual MAX_INTEGER64 value instead of C macro name --- src/gsumm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index a7ebe57185..9b96db8a81 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -775,7 +775,7 @@ SEXP gmin(SEXP x, SEXP narm) } for (i=0; i