From 14e011a379744b7079f8354ea89e720237852c24 Mon Sep 17 00:00:00 2001 From: Arun Srinivasan Date: Sat, 9 Feb 2019 22:25:40 +0000 Subject: [PATCH] Fix for #3349 foverlaps issue with datetimes < 1970-01-01 --- NEWS.md | 2 ++ R/foverlaps.R | 10 +++++----- inst/tests/tests.Rraw | 6 ++++++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 57d45c2a81..076ed4b6c5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,8 @@ 4. Grouping by unusual column names such as `by='string_with_\\'` and `keyby="x y"` could fail, [#3319](https://github.com/Rdatatable/data.table/issues/3319) and [#3378](https://github.com/Rdatatable/data.table/issues/3378). Thanks to @HughParsonage for reporting and @MichaelChirico for the fixes. +5. `foverlaps()` returned incorrect results overlapping on POSIXct objects which were <= `1970-01-01`, i.e., datetime values that were represented internally as -ve numeric values. This is now fixed. Closes [#3349](https://github.com/Rdatatable/data.table/issues/3349). Thanks to @lux5 for reporting. + #### NOTES 1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background. diff --git a/R/foverlaps.R b/R/foverlaps.R index d8a16b495b..afaa78d7e1 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -70,10 +70,9 @@ foverlaps <- function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y= bits = floor(log2(.Machine$double.eps)) 2 ^ (bits + (getNumericRounding() * 8L)) } - incr = 1 + dt_eps() isdouble = TRUE isposix = "POSIXct" %chin% yclass - } else incr = 1L # integer or Date class for example + } ## hopefully all checks are over. Now onto the actual task at hand. origx = x; x = shallow(x, by.x) origy = y; y = shallow(y, by.y) @@ -90,15 +89,16 @@ foverlaps <- function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y= mcall = make_call(mcols, quote(c)) if (type %chin% c("within", "any")) { mcall[[3L]] = substitute( - if (isposix) unclass(val)*incr # incr is okay since this won't be negative + # datetimes before 1970-01-01 are represented as -ve numerics, #3349 + if (isposix) unclass(val)*(1 + sign(unclass(val))*dt_eps()) else if (isdouble) { # fix for #1006 - 0.0 occurs in both start and end # better fix for 0.0, and other -ves. can't use 'incr' # hopefully this doesn't open another can of worms (val+dt_eps())*(1 + sign(val)*dt_eps()) } - else val+incr, - list(val = mcall[[3L]], incr = incr)) + else val+1L, # +1L is for integer/IDate/Date class, for examples + list(val = mcall[[3L]])) } make_call(c(icall, pos=mcall), quote(list)) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c0e894b989..a5ee4d1e84 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13414,6 +13414,12 @@ DT = data.table(a = c(1:3, 3:1)) test(1984.38, rowidv(DT, prefix = 5L), error='must be NULL or a character vector') test(1984.39, rowidv(DT, prefix = c('hey','you')), error='must be NULL or a character vector') +# Test for #3349, foverlaps returned spurious results with POSIXct objects < 1970-01-01 +x <- data.table(val=178.41,s=as.POSIXct("1968-04-25 04:20:00"),e=as.POSIXct("1968-04-25 04:20:00")) +y <- data.table(event="#1",s=as.POSIXct("1968-04-19 15:20:00"),e=as.POSIXct("1968-04-24 07:20:00")) +setkey(y, s, e) +# x$s and x$e are identical (i.e., range is actually a point, but that's okay). The point is to ensure that 'x' is not within 'y'. In older versions, this will be a match because of 'incr' value being 1 + dt_eps() instead of 1-dt_eps() (because the date here is < 1970-01-01 which is a -ve numeric value internally). 1+dt_eps() should be only for +ve numerics. +test(1985.1, nrow(foverlaps(x, y, by.x=c("s", "e"), type="within", nomatch=0L)), 0L) ################################### # Add new tests above this line #