From a3f9fb5e299c67067ce7c6676f60880b97895517 Mon Sep 17 00:00:00 2001 From: MarkusBonsch Date: Sun, 10 Dec 2017 22:22:27 +0100 Subject: [PATCH 1/2] Fixed bug in CJ with two empty columns. --- R/setkey.R | 12 +++++++++--- inst/tests/tests.Rraw | 10 ++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/R/setkey.R b/R/setkey.R index 59c6521c49..75c94eb719 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -334,12 +334,18 @@ CJ <- function(..., sorted = TRUE, unique = FALSE) # Cross Join will then produce a join table with the combination of all values (cross product). # The last vector is varied the quickest in the table, so dates should be last for roll for example l = list(...) - if (unique) l = lapply(l, unique) + emptyList <- FALSE ## fix for XXX + if(any(sapply(l, length) == 0)){ + ## at least one column is empty The whole thing will be empty in the end + emptyList <- TRUE + l <- lapply(l, "[", 0) + } + if (unique && !emptyList) l = lapply(l, unique) dups = FALSE # fix for #1513 - if (length(l)==1L && sorted && length(o <- forderv(l[[1L]]))) + if (length(l)==1L && !emptyList && sorted && length(o <- forderv(l[[1L]]))) l[[1L]] = l[[1L]][o] - else if (length(l) > 1L) { + else if (length(l) > 1L && !emptyList) { # using rep.int instead of rep speeds things up considerably (but attributes are dropped). attribs = lapply(l, attributes) # remember attributes for resetting after rep.int n = vapply(l, length, 0L) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2f597061dc..835f881ed6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11187,6 +11187,16 @@ DT = setNames(data.frame(a = 1, b = 2, c = 3, d = 4), c(NA, "b", "c", NA)) setnames(DT, c('a', 'b', 'c', 'd')) test(1853, names(DT), c('a', 'b', 'c', 'd')) +# CJ bug with multiple empty vectors (#XXX) +test(1854.1, data.frame(CJ(x = integer(0))), setattr(expand.grid(x = integer(0)), "out.attrs", NULL)) +test(1854.2, data.frame(CJ(x = integer(0), y = character(0))), setattr(expand.grid(x = integer(0), y = character(0)), "out.attrs", NULL)) +test(1854.3, data.frame(CJ(x = integer(0), y = c("a", "b"))), setattr(expand.grid(x = integer(0), y = c("a", "b")), "out.attrs", NULL)) +test(1854.4, data.frame(CJ(x = integer(0), y = character(0), z = logical(0))), setattr(expand.grid(x = integer(0), y = character(0), z = logical(0)), "out.attrs", NULL)) +test(1854.5, data.frame(CJ(x = character(0), y = NA_real_)), setattr(expand.grid(x = character(0), y = NA_real_), "out.attrs", NULL)) +if ("package:bit64" %in% search()) { + test(1854.6, data.frame(CJ(x = integer64(0), y = as.integer64(2))), setattr(expand.grid(x = integer64(0), y = as.integer64(2)), "out.attrs", NULL)) +} + ########################## # TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time. From 5d4432de356c4f73333bed50a58da6bfa19a1cef Mon Sep 17 00:00:00 2001 From: MarkusBonsch Date: Sun, 10 Dec 2017 22:29:41 +0100 Subject: [PATCH 2/2] Added NEWS and references to issue. --- NEWS.md | 2 ++ R/setkey.R | 2 +- inst/tests/tests.Rraw | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index b68fe26269..791accdbdb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -115,6 +115,8 @@ 27. `setnames` of whole table when original table had `NA` names skipped replacing those, [#2475](https://github.com/Rdatatable/data.table/issues/2475). Thanks to @franknarf1 and [BenoitLondon on StackOverflow](https://stackoverflow.com/questions/47228836/) for the report and @MichaelChirico for fixing. +28. CJ() works with multiple empty vectors now [#2511](https://github.com/Rdatatable/data.table/issues/2511). Thanks to @MarkusBonsch for fixing. + #### NOTES diff --git a/R/setkey.R b/R/setkey.R index 75c94eb719..9f935ef505 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -334,7 +334,7 @@ CJ <- function(..., sorted = TRUE, unique = FALSE) # Cross Join will then produce a join table with the combination of all values (cross product). # The last vector is varied the quickest in the table, so dates should be last for roll for example l = list(...) - emptyList <- FALSE ## fix for XXX + emptyList <- FALSE ## fix for #2511 if(any(sapply(l, length) == 0)){ ## at least one column is empty The whole thing will be empty in the end emptyList <- TRUE diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 835f881ed6..d4497d2d58 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11187,7 +11187,7 @@ DT = setNames(data.frame(a = 1, b = 2, c = 3, d = 4), c(NA, "b", "c", NA)) setnames(DT, c('a', 'b', 'c', 'd')) test(1853, names(DT), c('a', 'b', 'c', 'd')) -# CJ bug with multiple empty vectors (#XXX) +# CJ bug with multiple empty vectors (#2511) test(1854.1, data.frame(CJ(x = integer(0))), setattr(expand.grid(x = integer(0)), "out.attrs", NULL)) test(1854.2, data.frame(CJ(x = integer(0), y = character(0))), setattr(expand.grid(x = integer(0), y = character(0)), "out.attrs", NULL)) test(1854.3, data.frame(CJ(x = integer(0), y = c("a", "b"))), setattr(expand.grid(x = integer(0), y = c("a", "b")), "out.attrs", NULL))