From ee19734123609100e537c74694bfbcdbd56a0408 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Sun, 7 May 2017 17:00:17 -0700 Subject: [PATCH 1/4] retain timezones on CJ() --- NEWS.md | 1 + R/setkey.R | 4 ++++ inst/tests/tests.Rraw | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/NEWS.md b/NEWS.md index 2bc4a9398e..5bab7f76d4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -30,6 +30,7 @@ 5. When `fread()` and `print()` see `integer64` columns are present but package `bit64` is not installed, the warning is now displayed as intended. Thanks to a question by Santosh on r-help and forwarded by Bill Dunlap. +6. `CJ()` no longer loses the timezone information in `POSIXct` vectors, [#2029](https://github.com/Rdatatable/data.table/issues/2029). Thanks to @MarkusBonsch for reporting and to @royalts for the fix. #### NOTES diff --git a/R/setkey.R b/R/setkey.R index c13a88fd31..41dd2d51bc 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -332,6 +332,7 @@ CJ <- function(..., sorted = TRUE, unique = FALSE) dups = FALSE # fix for #1513 # using rep.int instead of rep speeds things up considerably (but attributes are dropped). j = lapply(l, class) # changed "vapply" to avoid errors with "ordered" "factor" input + tzones = lapply(l, function(col) attr(col, 'tzone')) if (length(l)==1L && sorted && length(o <- forderv(l[[1L]]))) l[[1L]] = l[[1L]][o] else if (length(l) > 1L) { @@ -353,6 +354,9 @@ CJ <- function(..., sorted = TRUE, unique = FALSE) l[[i]] = rep.int(rep.int(y, times = rep.int(x[i], n[i])), times = nrow/(x[i]*n[i])) if (any(class(l[[i]]) != j[[i]])) setattr(l[[i]], 'class', j[[i]]) # reset "Date" class - rep.int coerces to integer + if (any(!sapply(tzones, is.null))) { + setattr(l[[i]], 'tzone', tzones[[i]]) # reset tzone for POSIX* columns - rep.int coerces to integer + } } } setattr(l, "row.names", .set_row_names(length(l[[1L]]))) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 600bc5476c..b123c75a33 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -9898,6 +9898,10 @@ unlink(f) test(1761.1, fread("1\n2\n3", fill=TRUE), data.table(V1=1:3)) test(1761.2, fread("1\n2\n3", fill=FALSE), data.table(V1=1:3)) +# CJ should retain timezone information, #2029 +df <- CJ(week=as.POSIXct('2016-01-01', tz = 'UTC'), id=1:10) +test(1762, attr(df$week, 'tzone'), 'UTC') + ########################## # TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time. From 375b8f66eed18454f5c9947e675a860983022ad4 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Fri, 19 May 2017 10:30:22 -0700 Subject: [PATCH 2/4] updated patch due to MarkusBonsch --- NEWS.md | 2 +- R/setkey.R | 11 ++++------ inst/tests/tests.Rraw | 49 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5bab7f76d4..b3960950a4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -30,7 +30,7 @@ 5. When `fread()` and `print()` see `integer64` columns are present but package `bit64` is not installed, the warning is now displayed as intended. Thanks to a question by Santosh on r-help and forwarded by Bill Dunlap. -6. `CJ()` no longer loses the timezone information in `POSIXct` vectors, [#2029](https://github.com/Rdatatable/data.table/issues/2029). Thanks to @MarkusBonsch for reporting and to @royalts for the fix. +6. `CJ()` no longer loses attribute information, [#2029](https://github.com/Rdatatable/data.table/issues/2029). Thanks to @MarkusBonsch and @royalts for the fix. #### NOTES diff --git a/R/setkey.R b/R/setkey.R index 41dd2d51bc..48c53091bc 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -330,12 +330,11 @@ CJ <- function(..., sorted = TRUE, unique = FALSE) if (unique) l = lapply(l, unique) dups = FALSE # fix for #1513 - # using rep.int instead of rep speeds things up considerably (but attributes are dropped). - j = lapply(l, class) # changed "vapply" to avoid errors with "ordered" "factor" input - tzones = lapply(l, function(col) attr(col, 'tzone')) if (length(l)==1L && sorted && length(o <- forderv(l[[1L]]))) l[[1L]] = l[[1L]][o] else if (length(l) > 1L) { + # using rep.int instead of rep speeds things up considerably (but attributes are dropped). + attribs = lapply(l, attributes) # remember attributes for resetting after rep.int n = vapply(l, length, 0L) nrow = prod(n) x = c(rev(take(cumprod(rev(n)))), 1L) @@ -352,10 +351,8 @@ CJ <- function(..., sorted = TRUE, unique = FALSE) l[[i]] = rep.int(y, times = nrow/(x[i]*n[i])) else l[[i]] = rep.int(rep.int(y, times = rep.int(x[i], n[i])), times = nrow/(x[i]*n[i])) - if (any(class(l[[i]]) != j[[i]])) - setattr(l[[i]], 'class', j[[i]]) # reset "Date" class - rep.int coerces to integer - if (any(!sapply(tzones, is.null))) { - setattr(l[[i]], 'tzone', tzones[[i]]) # reset tzone for POSIX* columns - rep.int coerces to integer + if (!is.null(attribs[[i]])){ + attributes(l[[i]]) <- attribs[[i]] # reset all attributes that were destroyed by rep.int } } } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b123c75a33..c82424ab2d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -9898,9 +9898,52 @@ unlink(f) test(1761.1, fread("1\n2\n3", fill=TRUE), data.table(V1=1:3)) test(1761.2, fread("1\n2\n3", fill=FALSE), data.table(V1=1:3)) -# CJ should retain timezone information, #2029 -df <- CJ(week=as.POSIXct('2016-01-01', tz = 'UTC'), id=1:10) -test(1762, attr(df$week, 'tzone'), 'UTC') +# CJ retains attributes and classes, #2150 + +l <- list(a = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC"), + b = as.POSIXct(c("2016-01-01", "2017-01-01")), + c = as.Date("2015-01-01"), ## according to comment about CJ loosing date class + d = factor(c("a", "b", "c"), ordered = TRUE), ## according to comment about bug with ordered factors + e = factor(c("a", "b", "c"), ordered = FALSE), + f = c(1,2), + g = c("a", "b"), + h = c(TRUE, FALSE)) +setattr(l$g, "test", "testval")## add hand-made attribute + +test(1762.1, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1762.2, lapply(l, class), lapply(do.call(CJ, l), class)) + +l <- list(a = factor(c("a", "b", "c"), ordered = TRUE), + b = as.POSIXct(c("2016-01-01", "2017-01-01")), + c = as.Date("2015-01-01"), + d = factor(c("a", "b", "c"), ordered = TRUE), + e = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC"), + f = c(1,2), + g = c("a", "b"), + h = c(TRUE, FALSE)) + + +test(1762.3, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1762.4, lapply(l, class), lapply(do.call(CJ, l), class)) + +l <- list(a = factor(c("a", "b", "c"), ordered = TRUE), + b = as.POSIXct(c("2016-01-01", "2017-01-01")), + c = as.Date("2015-01-01"), + d = factor(c("a", "b", "c"), ordered = TRUE), + e = c(TRUE, FALSE), + f = c(1,2), + g = c("a", "b"), + h = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC")) + +test(1762.5, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1762.6, lapply(l, class), lapply(do.call(CJ, l), class)) + +l <- list(a = NA, + c = c(1,2), + d = as.POSIXct("2016-01-01", tz = "UTC")) + +test(1762.7, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1762.8, lapply(l, class), lapply(do.call(CJ, l), class)) ########################## From 7614d62480367cc2cd3530ddf00e7b363a94fd4c Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Fri, 4 Aug 2017 15:45:37 -0700 Subject: [PATCH 3/4] Moved news item to end to workaround diff --- NEWS.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 73d8fa505f..913feca6bd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -30,17 +30,17 @@ 5. When `fread()` and `print()` see `integer64` columns are present but package `bit64` is not installed, the warning is now displayed as intended. Thanks to a question by Santosh on r-help and forwarded by Bill Dunlap. -6. `CJ()` no longer loses attribute information, [#2029](https://github.com/Rdatatable/data.table/issues/2029). Thanks to @MarkusBonsch and @royalts for the pull request. +6. Setting `j = {}` no longer results in an error, [#2142](https://github.com/Rdatatable/data.table/issues/2142). Thanks Michael Chirico for the pull request. -7. Setting `j = {}` no longer results in an error, [#2142](https://github.com/Rdatatable/data.table/issues/2142). Thanks Michael Chirico for the pull request. +7. Seg fault in `rbindlist()` when one or more items are empty, [#2019](https://github.com/Rdatatable/data.table/issues/2019). Thanks Michael Lang for the pull request. -8. Seg fault in `rbindlist()` when one or more items are empty, [#2019](https://github.com/Rdatatable/data.table/issues/2019). Thanks Michael Lang for the pull request. +8. Error printing 0-length `ITime` and `NA` objects, [#2032](https://github.com/Rdatatable/data.table/issues/2032) and [#2171](https://github.com/Rdatatable/data.table/issues/2171). Thanks Michael Chirico for the pull requests and @franknarf1 for pointing out a shortcoming of the initial fix. -9. Error printing 0-length `ITime` and `NA` objects, [#2032](https://github.com/Rdatatable/data.table/issues/2032) and [#2171](https://github.com/Rdatatable/data.table/issues/2171). Thanks Michael Chirico for the pull requests and @franknarf1 for pointing out a shortcoming of the initial fix. +9. `as.IDate.POSIXct` error with `NULL` timezone, [#1973](https://github.com/Rdatatable/data.table/issues/1973). Thanks @lbilli for reporting and Michael Chirico for the pull request. -10. `as.IDate.POSIXct` error with `NULL` timezone, [#1973](https://github.com/Rdatatable/data.table/issues/1973). Thanks @lbilli for reporting and Michael Chirico for the pull request. +10. Printing a null `data.table` with `print` no longer visibly outputs `NULL`, [#1852](https://github.com/Rdatatable/data.table/issues/1852). Thanks @aaronmcdaid for spotting and @MichaelChirico for the PR. -11. Printing a null `data.table` with `print` no longer visibly outputs `NULL`, [#1852](https://github.com/Rdatatable/data.table/issues/1852). Thanks @aaronmcdaid for spotting and @MichaelChirico for the PR. +11. `CJ()` no longer loses attribute information, [#2029](https://github.com/Rdatatable/data.table/issues/2029). Thanks to @MarkusBonsch and @royalts for the pull request. #### NOTES From 9288e7eee7b80e1a79a52b8cbc1e8b3b863041a6 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Fri, 4 Aug 2017 16:09:20 -0700 Subject: [PATCH 4/4] Moved test to the end --- inst/tests/tests.Rraw | 106 +++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 072ce6220b..780ffcfae2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -9918,75 +9918,28 @@ unlink(f) test(1761.1, fread("1\n2\n3", fill=TRUE), data.table(V1=1:3)) test(1761.2, fread("1\n2\n3", fill=FALSE), data.table(V1=1:3)) -# CJ retains attributes and classes, #2150 - -l <- list(a = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC"), - b = as.POSIXct(c("2016-01-01", "2017-01-01")), - c = as.Date("2015-01-01"), ## according to comment about CJ loosing date class - d = factor(c("a", "b", "c"), ordered = TRUE), ## according to comment about bug with ordered factors - e = factor(c("a", "b", "c"), ordered = FALSE), - f = c(1,2), - g = c("a", "b"), - h = c(TRUE, FALSE)) -setattr(l$g, "test", "testval")## add hand-made attribute - -test(1762.1, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) -test(1762.2, lapply(l, class), lapply(do.call(CJ, l), class)) - -l <- list(a = factor(c("a", "b", "c"), ordered = TRUE), - b = as.POSIXct(c("2016-01-01", "2017-01-01")), - c = as.Date("2015-01-01"), - d = factor(c("a", "b", "c"), ordered = TRUE), - e = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC"), - f = c(1,2), - g = c("a", "b"), - h = c(TRUE, FALSE)) - - -test(1762.3, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) -test(1762.4, lapply(l, class), lapply(do.call(CJ, l), class)) - -l <- list(a = factor(c("a", "b", "c"), ordered = TRUE), - b = as.POSIXct(c("2016-01-01", "2017-01-01")), - c = as.Date("2015-01-01"), - d = factor(c("a", "b", "c"), ordered = TRUE), - e = c(TRUE, FALSE), - f = c(1,2), - g = c("a", "b"), - h = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC")) - -test(1762.5, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) -test(1762.6, lapply(l, class), lapply(do.call(CJ, l), class)) - -l <- list(a = NA, - c = c(1,2), - d = as.POSIXct("2016-01-01", tz = "UTC")) - -test(1762.7, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) -test(1762.8, lapply(l, class), lapply(do.call(CJ, l), class)) - # non-error with non-empty empty j, #2142 DT = data.table(a = 1:5) -test(1763, DT[ , {}], NULL) +test(1762, DT[ , {}], NULL) # rbindlist empty items segfault, #2019 x = list(list(a = 1), list(), list(a = 2)) ans = data.table(id=c(1L,3L),a=c(1,2)) -for (i in 1:100) test(1764, rbindlist(x, idcol="id"), ans) +for (i in 1:100) test(1763, rbindlist(x, idcol="id"), ans) # as.ITime(character(0)) used to fail, #2032 -test(1765.1, format(as.ITime(character(0))), character(0)) +test(1764.1, format(as.ITime(character(0))), character(0)) # Edge case from #2171 -test(1765.2, format(structure(NA_integer_, class = "ITime")), NA_character_) +test(1764.2, format(structure(NA_integer_, class = "ITime")), NA_character_) # IDateTime error when tzone is NULL, #1973 x = as.POSIXct('2017-03-17') attr(x, 'tzone') = NULL -test(1766, print(IDateTime(x)), output=".*idate.*itime.*1: 2017-03-17 00:00:00") +test(1765, print(IDateTime(x)), output=".*idate.*itime.*1: 2017-03-17 00:00:00") # print(null.data.table()) should not output NULL as well, #1852 # use capture.output() in this case rather than output= to ensure NULL is not output -test(1767, capture.output(print(data.table(NULL))), "Null data.table (0 rows and 0 cols)") +test(1766, capture.output(print(data.table(NULL))), "Null data.table (0 rows and 0 cols)") # Bug on subset of 1-row data.table when expr returns a named logical vector #2152 op = options(datatable.auto.index=FALSE) @@ -10021,6 +9974,53 @@ test(1770.6, fread(f)[256,V64], "0ABC") test(1770.7, fread(f, skip="spam"), error="not found in input") unlink(f) +# CJ retains attributes and classes, #2029, PR#2150 + +l <- list(a = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC"), + b = as.POSIXct(c("2016-01-01", "2017-01-01")), + c = as.Date("2015-01-01"), ## according to comment about CJ loosing date class + d = factor(c("a", "b", "c"), ordered = TRUE), ## according to comment about bug with ordered factors + e = factor(c("a", "b", "c"), ordered = FALSE), + f = c(1,2), + g = c("a", "b"), + h = c(TRUE, FALSE)) +setattr(l$g, "test", "testval")## add hand-made attribute + +test(1771.1, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1771.2, lapply(l, class), lapply(do.call(CJ, l), class)) + +l <- list(a = factor(c("a", "b", "c"), ordered = TRUE), + b = as.POSIXct(c("2016-01-01", "2017-01-01")), + c = as.Date("2015-01-01"), + d = factor(c("a", "b", "c"), ordered = TRUE), + e = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC"), + f = c(1,2), + g = c("a", "b"), + h = c(TRUE, FALSE)) + + +test(1771.3, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1771.4, lapply(l, class), lapply(do.call(CJ, l), class)) + +l <- list(a = factor(c("a", "b", "c"), ordered = TRUE), + b = as.POSIXct(c("2016-01-01", "2017-01-01")), + c = as.Date("2015-01-01"), + d = factor(c("a", "b", "c"), ordered = TRUE), + e = c(TRUE, FALSE), + f = c(1,2), + g = c("a", "b"), + h = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC")) + +test(1771.5, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1771.6, lapply(l, class), lapply(do.call(CJ, l), class)) + +l <- list(a = NA, + c = c(1,2), + d = as.POSIXct("2016-01-01", tz = "UTC")) + +test(1771.7, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1771.8, lapply(l, class), lapply(do.call(CJ, l), class)) + ##########################