diff --git a/NEWS.md b/NEWS.md index e70decaf0e..0b846e69fe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -51,6 +51,9 @@ 14. `fread()`'s rare `Internal error: Sampling jump point 10 is before the last jump ended` has been fixed, [#2157](https://github.com/Rdatatable/data.table/issues/2157). Thanks to Frank Erickson and Artem Klevtsov for reporting with example files which are now added to the test suite. +15. `CJ()` no longer loses attribute information, [#2029](https://github.com/Rdatatable/data.table/issues/2029). Thanks to @MarkusBonsch and @royalts for the pull request. + + #### NOTES 1. `?data.table` makes explicit the option of using a `logical` vector in `j` to select columns, [#1978](https://github.com/Rdatatable/data.table/issues/1978). Thanks @Henrik-P for the note and @MichaelChirico for filing. diff --git a/R/setkey.R b/R/setkey.R index 4aada6d368..311c8aa50b 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -330,11 +330,11 @@ CJ <- function(..., sorted = TRUE, unique = FALSE) if (unique) l = lapply(l, unique) dups = FALSE # fix for #1513 - # using rep.int instead of rep speeds things up considerably (but attributes are dropped). - j = lapply(l, class) # changed "vapply" to avoid errors with "ordered" "factor" input if (length(l)==1L && sorted && length(o <- forderv(l[[1L]]))) l[[1L]] = l[[1L]][o] else if (length(l) > 1L) { + # using rep.int instead of rep speeds things up considerably (but attributes are dropped). + attribs = lapply(l, attributes) # remember attributes for resetting after rep.int n = vapply(l, length, 0L) nrow = prod(n) x = c(rev(take(cumprod(rev(n)))), 1L) @@ -351,8 +351,9 @@ CJ <- function(..., sorted = TRUE, unique = FALSE) l[[i]] = rep.int(y, times = nrow/(x[i]*n[i])) else l[[i]] = rep.int(rep.int(y, times = rep.int(x[i], n[i])), times = nrow/(x[i]*n[i])) - if (any(class(l[[i]]) != j[[i]])) - setattr(l[[i]], 'class', j[[i]]) # reset "Date" class - rep.int coerces to integer + if (!is.null(attribs[[i]])){ + attributes(l[[i]]) <- attribs[[i]] # reset all attributes that were destroyed by rep.int + } } } setattr(l, "row.names", .set_row_names(length(l[[1L]]))) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 182a2a39fa..780ffcfae2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -9974,6 +9974,53 @@ test(1770.6, fread(f)[256,V64], "0ABC") test(1770.7, fread(f, skip="spam"), error="not found in input") unlink(f) +# CJ retains attributes and classes, #2029, PR#2150 + +l <- list(a = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC"), + b = as.POSIXct(c("2016-01-01", "2017-01-01")), + c = as.Date("2015-01-01"), ## according to comment about CJ loosing date class + d = factor(c("a", "b", "c"), ordered = TRUE), ## according to comment about bug with ordered factors + e = factor(c("a", "b", "c"), ordered = FALSE), + f = c(1,2), + g = c("a", "b"), + h = c(TRUE, FALSE)) +setattr(l$g, "test", "testval")## add hand-made attribute + +test(1771.1, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1771.2, lapply(l, class), lapply(do.call(CJ, l), class)) + +l <- list(a = factor(c("a", "b", "c"), ordered = TRUE), + b = as.POSIXct(c("2016-01-01", "2017-01-01")), + c = as.Date("2015-01-01"), + d = factor(c("a", "b", "c"), ordered = TRUE), + e = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC"), + f = c(1,2), + g = c("a", "b"), + h = c(TRUE, FALSE)) + + +test(1771.3, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1771.4, lapply(l, class), lapply(do.call(CJ, l), class)) + +l <- list(a = factor(c("a", "b", "c"), ordered = TRUE), + b = as.POSIXct(c("2016-01-01", "2017-01-01")), + c = as.Date("2015-01-01"), + d = factor(c("a", "b", "c"), ordered = TRUE), + e = c(TRUE, FALSE), + f = c(1,2), + g = c("a", "b"), + h = as.POSIXct(c("2016-01-01", "2017-01-01"), tz = "UTC")) + +test(1771.5, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1771.6, lapply(l, class), lapply(do.call(CJ, l), class)) + +l <- list(a = NA, + c = c(1,2), + d = as.POSIXct("2016-01-01", tz = "UTC")) + +test(1771.7, lapply(l, attributes), lapply(do.call(CJ, l), attributes)) +test(1771.8, lapply(l, class), lapply(do.call(CJ, l), class)) + ##########################