From 22a03c8a6417b4cb7c888c8e11b32b490e29b8f7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 26 May 2019 13:27:37 +0800 Subject: [PATCH 1/7] Closes #2008 -- updates for IDate such that rbind.data.frame retains integer mode --- NAMESPACE | 1 + NEWS.md | 2 ++ R/IDateTime.R | 3 ++- inst/tests/tests.Rraw | 3 +++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 747ae77ab0..d96ab5cb28 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -161,6 +161,7 @@ S3method(seq, IDate) S3method(split, IDate) S3method(unique, IDate) S3method(unique, ITime) +S3method('[<-', IDate) # duplist # getdots diff --git a/NEWS.md b/NEWS.md index 1da3af114a..b88373950e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -122,6 +122,8 @@ 16. `rbind` and `rbindlist` of an item containing an ordered factor with levels containing an `NA` (as opposed to an NA integer) could segfault, [#3601](https://github.com/Rdatatable/data.table/issues/3601). This was a a regression in v1.12.2. Thanks to Damian Betebenner for reporting. +17. `rbind.data.frame` on `IDate` columns corrupted the `integer` storage mode, [#2008](https://github.com/Rdatatable/data.table/issues/2008). Thanks to @rmcgehee for reporting. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/R/IDateTime.R b/R/IDateTime.R index a26f8c7b43..41ac2e2cac 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -40,7 +40,7 @@ as.IDate.POSIXct = function(x, tz = attr(x, "tzone"), ...) { as.IDate(as.Date(x, tz = tz, ...)) } -as.IDate.IDate = function(x, ...) x +as.IDate.IDate = function(x, ...) as.integer(x) as.Date.IDate = function(x, ...) { x = as.numeric(x) @@ -55,6 +55,7 @@ c.IDate = rep.IDate = split.IDate = unique.IDate = +`[<-.IDate` = function(x, ...) { as.IDate(NextMethod()) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fe4d38d0bb..7bd8f06983 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14932,6 +14932,9 @@ test(2052, rbindlist(list(dt1, dt2), fill=TRUE), data.table(V1=c(dt1$V1, dt2$V1), V46=structure(c(3L,1L,1L,NA,3L,NA,NA,NA,NA,NA), .Label=c("Low","Typical","High",NA), class = c("ordered", "factor")))) +# rbind.data.frame doesn't respect integer storage mode of IDate +DF = data.frame(date = as.IDate(0L)) +test(2053, storage.mode(rbind(DF, DF)$date), 'integer') ################################### # Add new tests above this line # From ae3c1777c572ffefc76625732bfbbb5370f82be2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 26 May 2019 17:45:15 +0800 Subject: [PATCH 2/7] improved implementation --- R/IDateTime.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/R/IDateTime.R b/R/IDateTime.R index 41ac2e2cac..20a20332f8 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -60,6 +60,15 @@ unique.IDate = as.IDate(NextMethod()) } +`[<-.IDate` = function(x, i, value) { + if (!length(value)) return(x) + value = as.integer(as.IDate(value)) + setattr(x, 'class', NULL) + x[i] = value + setattr(x, 'class', c('IDate', 'Date')) + x +} + # fix for #1315 as.list.IDate = function(x, ...) NextMethod() From 6c0dd54fc9f91deb89b4f3e19aaddf60c1318f88 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 26 May 2019 17:50:02 +0800 Subject: [PATCH 3/7] add comment & restore simple as.IDate.IDate --- R/IDateTime.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/IDateTime.R b/R/IDateTime.R index 20a20332f8..de33fca823 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -40,7 +40,7 @@ as.IDate.POSIXct = function(x, tz = attr(x, "tzone"), ...) { as.IDate(as.Date(x, tz = tz, ...)) } -as.IDate.IDate = function(x, ...) as.integer(x) +as.IDate.IDate = function(x, ...) x as.Date.IDate = function(x, ...) { x = as.numeric(x) @@ -55,11 +55,13 @@ c.IDate = rep.IDate = split.IDate = unique.IDate = -`[<-.IDate` = function(x, ...) { as.IDate(NextMethod()) } +# for #2008 -- Internal rbind calls [<-, +# which winds up (1) converting to numeric and (2) keeping IDate class +# Define our own method to prevent value ever coercing to double `[<-.IDate` = function(x, i, value) { if (!length(value)) return(x) value = as.integer(as.IDate(value)) From d39e99a1bd59ca5a0f73d3252f1f7aeb90951324 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 26 May 2019 22:49:58 +0800 Subject: [PATCH 4/7] fixed test? --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7bd8f06983..aab2cf5246 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1002,7 +1002,7 @@ test(344, DT[,(mycols):=NULL], data.table(NULL)) # Now, as from 1.12.4, the integer IDate in i is retained as integer rather than letting the double IDate persist. DF = data.frame(x=as.IDate(c("2010-01-01","2010-01-02")), y=1:6) DT = as.data.table(rbind(DF,DF)) -test(345, DT[,sum(y),by=x], {.x=as.IDate(c("2010-01-01","2010-01-02"));mode(.x)="double";data.table(x=.x,V1=c(18L,24L))}) +test(345, DT[,sum(y),by=x], data.table(x=as.IDate(c("2010-01-01","2010-01-02")),V1=c(18L,24L))}) test(346, setkey(DT,x)[J(as.IDate("2010-01-02"))], data.table(x=as.IDate(rep("2010-01-02",6L)), y=rep(c(2L,4L,6L),2), key="x")) # Test .N==0 with nomatch=NA|0, # tests for #963 added as well From 58cbf3c7a01446f151d5f08c857373fa3a6f843f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 26 May 2019 23:04:16 +0800 Subject: [PATCH 5/7] comments addressing test change typo --- inst/tests/tests.Rraw | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aab2cf5246..7a3cab7d90 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -993,16 +993,17 @@ test(343, DT[,(mynewcol):=21L], data.table(b=1:3,newname=21L)) mycols = 1:2 test(344, DT[,(mycols):=NULL], data.table(NULL)) - -# It seems that the .Internal rbind of two data.frame coerces IDate to numeric. Tried defining -# "[<-.IDate" as per Tom's suggestion, and c.IDate to no avail (maybe because the .Internal code -# in bind.c doesn't look up package methods?). Anyway, as from 1.8.1, double are allowed in keys, so -# these still work but for a different reason than before 1.8.1: the results are IDate stored as double, -# rather than before when is worked because by and setkey coerced double to integer. -# Now, as from 1.12.4, the integer IDate in i is retained as integer rather than letting the double IDate persist. +# this test originally tested that IDate would come back as +# double from rbind(DF, DF) where DF is a data.frame +# with IDate column. This is exactly the bug reported in #2008, +# but the bug is elusive to track down so it took a long time +# to fix and the resulting IDate being stored as double +# was kept as a test here. With #2008, now we check it +# in fact stays as an integer. DF = data.frame(x=as.IDate(c("2010-01-01","2010-01-02")), y=1:6) DT = as.data.table(rbind(DF,DF)) -test(345, DT[,sum(y),by=x], data.table(x=as.IDate(c("2010-01-01","2010-01-02")),V1=c(18L,24L))}) +test(345, DT[,sum(y),by=x], data.table(x=as.IDate(c("2010-01-01","2010-01-02")),V1=c(18L,24L))) +# Also, as from 1.12.4, the integer IDate in i is retained as integer here as well. test(346, setkey(DT,x)[J(as.IDate("2010-01-02"))], data.table(x=as.IDate(rep("2010-01-02",6L)), y=rep(c(2L,4L,6L),2), key="x")) # Test .N==0 with nomatch=NA|0, # tests for #963 added as well From 9ed60cef1d1961dca305e2651b9553ba9c8f26f3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 27 May 2019 00:40:45 +0800 Subject: [PATCH 6/7] coverage typo --- inst/tests/tests.Rraw | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7a3cab7d90..b0953cc522 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14933,9 +14933,11 @@ test(2052, rbindlist(list(dt1, dt2), fill=TRUE), data.table(V1=c(dt1$V1, dt2$V1), V46=structure(c(3L,1L,1L,NA,3L,NA,NA,NA,NA,NA), .Label=c("Low","Typical","High",NA), class = c("ordered", "factor")))) +# [<-.IDate method solves #2008 -- # rbind.data.frame doesn't respect integer storage mode of IDate DF = data.frame(date = as.IDate(0L)) -test(2053, storage.mode(rbind(DF, DF)$date), 'integer') +test(2053.1, storage.mode(rbind(DF, DF)$date), 'integer') +test(2053.2, DF$date[1L] <- integer(), integer()) ################################### # Add new tests above this line # From 908701c881a11020f78c4ffd8fc6fecb8c0c97ce Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 28 May 2019 17:34:02 -0700 Subject: [PATCH 7/7] tidy --- R/IDateTime.R | 4 +--- inst/tests/tests.Rraw | 14 ++++---------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/R/IDateTime.R b/R/IDateTime.R index de33fca823..5c4d2dfde8 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -59,9 +59,7 @@ unique.IDate = as.IDate(NextMethod()) } -# for #2008 -- Internal rbind calls [<-, -# which winds up (1) converting to numeric and (2) keeping IDate class -# Define our own method to prevent value ever coercing to double +# define this [<- method to prevent base R's internal rbind coercing integer IDate to double, #2008 `[<-.IDate` = function(x, i, value) { if (!length(value)) return(x) value = as.integer(as.IDate(value)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0d0783aac6..a1ee7151ef 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -993,17 +993,11 @@ test(343, DT[,(mynewcol):=21L], data.table(b=1:3,newname=21L)) mycols = 1:2 test(344, DT[,(mycols):=NULL], data.table(NULL)) -# this test originally tested that IDate would come back as -# double from rbind(DF, DF) where DF is a data.frame -# with IDate column. This is exactly the bug reported in #2008, -# but the bug is elusive to track down so it took a long time -# to fix and the resulting IDate being stored as double -# was kept as a test here. With #2008, now we check it -# in fact stays as an integer. +# this originally tested that IDate would come back as double from rbind(DF, DF) +# Now #2008 is fixed in v1.12.4 it now checks it stays as an integer. DF = data.frame(x=as.IDate(c("2010-01-01","2010-01-02")), y=1:6) DT = as.data.table(rbind(DF,DF)) test(345, DT[,sum(y),by=x], data.table(x=as.IDate(c("2010-01-01","2010-01-02")),V1=c(18L,24L))) -# Also, as from 1.12.4, the integer IDate in i is retained as integer here as well. test(346, setkey(DT,x)[J(as.IDate("2010-01-02"))], data.table(x=as.IDate(rep("2010-01-02",6L)), y=rep(c(2L,4L,6L),2), key="x")) # Test .N==0 with nomatch=NA|0, # tests for #963 added as well @@ -14966,12 +14960,12 @@ test(2052, rbindlist(list(dt1, dt2), fill=TRUE), data.table(V1=c(dt1$V1, dt2$V1), V46=structure(c(3L,1L,1L,NA,3L,NA,NA,NA,NA,NA), .Label=c("Low","Typical","High",NA), class = c("ordered", "factor")))) -# [<-.IDate method solves #2008 -- -# rbind.data.frame doesn't respect integer storage mode of IDate +# rbind.data.frame didn't respect integer storage mode of IDate, #2008 DF = data.frame(date = as.IDate(0L)) test(2053.1, storage.mode(rbind(DF, DF)$date), 'integer') test(2053.2, DF$date[1L] <- integer(), integer()) + ################################### # Add new tests above this line # ###################################