From bce5251689f276f1d882fb1c2a5d35eba3152bac Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 19 Aug 2019 23:32:27 +0800 Subject: [PATCH 1/3] Closes #3780 -- as.numeric on POSIXct to prevent overflow --- NEWS.md | 2 ++ R/IDateTime.R | 2 +- inst/tests/tests.Rraw | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index bc6b599c35..93e78e7564 100644 --- a/NEWS.md +++ b/NEWS.md @@ -228,6 +228,8 @@ 27. `integer64` sum-by-group is now properly optimized, [#1647](https://github.com/Rdatatable/data.table/issues/1647), [#3464](https://github.com/Rdatatable/data.table/issues/3464). Thanks to @mlandry22-h2o for the report. +28. `as.IDate.POSIXct` is correct for UTC times before Dec. 1901 or after Jan. 2038 (was returning `NA` due to integer overflow), [#3780](https://github.com/Rdatatable/data.table/issues/3780). Thanks @gschett for the report. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/R/IDateTime.R b/R/IDateTime.R index 8e03631066..08e97d1cb1 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -35,7 +35,7 @@ as.IDate.Date = function(x, ...) { as.IDate.POSIXct = function(x, tz = attr(x, "tzone", exact=TRUE), ...) { if (is.null(tz)) tz = "UTC" if (tz %chin% c("UTC", "GMT")) { - (setattr(as.integer(x) %/% 86400L, "class", c("IDate", "Date"))) # %/% returns new object so can use setattr() on it; wrap with () to return visibly + (setattr(as.integer(as.numeric(x) %/% 86400L), "class", c("IDate", "Date"))) # %/% returns new object so can use setattr() on it; wrap with () to return visibly } else as.IDate(as.Date(x, tz = tz, ...)) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 58c3c63c4a..016e48d6e1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15671,6 +15671,10 @@ if (test_bit64) { test(2077.06, int64_int32_match(d[, sum(i32, na.rm=TRUE), g], d[, sum(i64, na.rm=TRUE), g])) } +# #3780 POSIXct stored as numeric means as.integer before converting +# to seconds can overflow +test(2078, as.IDate(as.POSIXct("1900-01-01", tz="UTC")), as.IDate(-25567L)) + ################################### # Add new tests above this line # From 95bd96e6c8e7275e0720614be3d3c02ded1f10f4 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 19 Aug 2019 23:37:01 +0800 Subject: [PATCH 2/3] also apply to hour()/minute()/second() --- R/IDateTime.R | 6 +++--- inst/tests/tests.Rraw | 6 +++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/IDateTime.R b/R/IDateTime.R index 08e97d1cb1..129f9d621e 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -305,7 +305,7 @@ as.POSIXlt.ITime = function(x, ...) { second = function(x) { if (inherits(x, 'POSIXct') && identical(attr(x, 'tzone', exact=TRUE), 'UTC')) { # if we know the object is in UTC, can calculate the hour much faster - as.integer(x) %% 60L + as.numeric(x) %% 60L } else { as.integer(as.POSIXlt(x)$sec) } @@ -313,7 +313,7 @@ second = function(x) { minute = function(x) { if (inherits(x, 'POSIXct') && identical(attr(x, 'tzone', exact=TRUE), 'UTC')) { # ever-so-slightly faster than x %% 3600L %/% 60L - as.integer(x) %/% 60L %% 60L + as.numeric(x) %/% 60L %% 60L } else { as.POSIXlt(x)$min } @@ -321,7 +321,7 @@ minute = function(x) { hour = function(x) { if (inherits(x, 'POSIXct') && identical(attr(x, 'tzone', exact=TRUE), 'UTC')) { # ever-so-slightly faster than x %% 86400L %/% 3600L - as.integer(x) %/% 3600L %% 24L + as.numeric(x) %/% 3600L %% 24L } else { as.POSIXlt(x)$hour } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 016e48d6e1..5548d72183 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15673,7 +15673,11 @@ if (test_bit64) { # #3780 POSIXct stored as numeric means as.integer before converting # to seconds can overflow -test(2078, as.IDate(as.POSIXct("1900-01-01", tz="UTC")), as.IDate(-25567L)) +date=as.POSIXct("1900-01-01", tz="UTC") +test(2078.1, as.IDate(date), as.IDate(-25567L)) +test(2078.2, hour(date), 0L) +test(2078.3, minute(date), 0L) +test(2078.4, second(date), 0L) ################################### From c2605efffd19ba7b2862464e589421caee3d0c04 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2019 19:47:41 +0800 Subject: [PATCH 3/3] force integer afterwards --- R/IDateTime.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/IDateTime.R b/R/IDateTime.R index 129f9d621e..c0e31bc248 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -305,7 +305,7 @@ as.POSIXlt.ITime = function(x, ...) { second = function(x) { if (inherits(x, 'POSIXct') && identical(attr(x, 'tzone', exact=TRUE), 'UTC')) { # if we know the object is in UTC, can calculate the hour much faster - as.numeric(x) %% 60L + as.integer(as.numeric(x) %% 60L) } else { as.integer(as.POSIXlt(x)$sec) } @@ -313,7 +313,7 @@ second = function(x) { minute = function(x) { if (inherits(x, 'POSIXct') && identical(attr(x, 'tzone', exact=TRUE), 'UTC')) { # ever-so-slightly faster than x %% 3600L %/% 60L - as.numeric(x) %/% 60L %% 60L + as.integer(as.numeric(x) %/% 60L %% 60L) } else { as.POSIXlt(x)$min } @@ -321,7 +321,7 @@ minute = function(x) { hour = function(x) { if (inherits(x, 'POSIXct') && identical(attr(x, 'tzone', exact=TRUE), 'UTC')) { # ever-so-slightly faster than x %% 86400L %/% 3600L - as.numeric(x) %/% 3600L %% 24L + as.integer(as.numeric(x) %/% 3600L %% 24L) } else { as.POSIXlt(x)$hour }