From e5f1390aba2d1f8a701b8b81a378e5a82e428ede Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 3 Mar 2020 13:56:28 +0800 Subject: [PATCH 1/3] detect new edge case in auto-naming j --- NEWS.md | 2 ++ R/data.table.R | 14 +++++++++++--- inst/tests/tests.Rraw | 5 +++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 71fd76aa65..d1ed5494ab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -107,6 +107,8 @@ unit = "s") 12. `rbindlist` no longer errors when coercing complex vectors to character vectors, [#4202](https://github.com/Rdatatable/data.table/issues/4202). Thanks to @sritchie73 for reporting and the PR. +13. The auto-naming logic in now detects when different branches of `j` return a different number of columns, which is either an error or inefficient, [#4274](https://github.com/Rdatatable/data.table/issues/4274). Thanks to @robitalec for the bug report. + ## NOTES 0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. diff --git a/R/data.table.R b/R/data.table.R index 9292ee940b..6d2e4b6e5b 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -883,8 +883,12 @@ replace_dot_alias = function(e) { if (is.name(thisq)) nm[jj] = drop_dot(thisq) # TO DO: if call to a[1] for example, then call it 'a' too } - if (!is.null(jvnames) && any(idx <- nm != jvnames)) - warning("Different branches of j expression produced different auto-named columns: ", brackify(sprintf('%s!=%s', nm[idx], jvnames[idx])), '; using the most "last" names', call. = FALSE) + if (!is.null(jvnames)) { + if (length(nm) != length(jvnames)) + warning("j may not evaluate to the same number of columns for each group; if you're sure this warning is in error, please put the branching logic outside of [ for efficiency") + else if (any(idx <- nm != jvnames)) + warning("Different branches of j expression produced different auto-named columns: ", brackify(sprintf('%s!=%s', nm[idx], jvnames[idx])), '; using the most "last" names', call. = FALSE) + } jvnames <<- nm # TODO: handle if() list(a, b) else list(b, a) better setattr(q, "names", NULL) # drops the names from the list so it's faster to eval the j for each group; reinstated at the end on the result. } @@ -1330,7 +1334,11 @@ replace_dot_alias = function(e) { setattr(jval,"names",NULL) # discard names of named vectors otherwise each cell in the column would have a name jval = list(jval) } - if (!is.null(jvnames) && !all(jvnames=="")) setattr(jval, 'names', jvnames) # e.g. jvnames=="N" for DT[,.N,] + if (!is.null(jvnames) && any(nzchar(jvnames))) { + # see test 2139 + if (length(jvnames) > length(jval)) jvnames = jvnames[seq_along(jval)] + setattr(jval, 'names', jvnames[seq_along(jval)]) # e.g. jvnames=="N" for DT[,.N,] + } jval = as.data.table.list(jval, .named=NULL) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7cc6819e8f..79a1dc75c2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16846,3 +16846,8 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN)) test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) + +# compatible branches might seem incompatible if the condition is global: #4274 +DT = data.table(a=1L) +test(2139, DT[ , if (TRUE) .(a=1) else .(a=1, b=2)], DT, + warning='j may not evaluate to the same number of columns for each group') From 1d2e8a0d3514e4c5bda9614871ebe21718d22c48 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 4 Mar 2020 10:17:42 +0800 Subject: [PATCH 2/3] type int in tests --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 79a1dc75c2..287df20f04 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16849,5 +16849,5 @@ test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) # compatible branches might seem incompatible if the condition is global: #4274 DT = data.table(a=1L) -test(2139, DT[ , if (TRUE) .(a=1) else .(a=1, b=2)], DT, +test(2139, DT[ , if (TRUE) .(a=1L) else .(a=1L, b=2L)], DT, warning='j may not evaluate to the same number of columns for each group') From bd7fe54b77cf6ffd46f8ebe5f8b9bd5f2b0fdb27 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Thu, 13 May 2021 19:14:47 -0600 Subject: [PATCH 3/3] comment link to issue rather than test number; can get to the test number from the PR linked to issue, but it's the issue that we usually want to look at first --- NEWS.md | 2 +- R/data.table.R | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index c95f332397..fe861cebcf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -87,7 +87,7 @@ )] ``` -11. `DT[, if (...) .(a=1L) else .(a=1L, b=2L), by=group]` now returns a 1-column result with warning `j may not evaluate to the same number of columns for each group`, rather than the error `'names' attribute [2] must be the same length as the vector`, [#4274](https://github.com/Rdatatable/data.table/issues/4274). Thanks to @robitalec for reporting, and Michael Chirico for the PR. +11. `DT[, if (...) .(a=1L) else .(a=1L, b=2L), by=group]` now returns a 1-column result with warning `j may not evaluate to the same number of columns for each group`, rather than error `'names' attribute [2] must be the same length as the vector`, [#4274](https://github.com/Rdatatable/data.table/issues/4274). Thanks to @robitalec for reporting, and Michael Chirico for the PR. ## BUG FIXES diff --git a/R/data.table.R b/R/data.table.R index f419ff136d..3a3cf1f29d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1372,8 +1372,7 @@ replace_dot_alias = function(e) { jval = list(jval) } if (!is.null(jvnames) && any(nzchar(jvnames))) { - # see test 2139 - if (length(jvnames) > length(jval)) jvnames = jvnames[seq_along(jval)] + if (length(jvnames) > length(jval)) jvnames = jvnames[seq_along(jval)] #4274 setattr(jval, 'names', jvnames[seq_along(jval)]) # e.g. jvnames=="N" for DT[,.N,] } jval = as.data.table.list(jval, .named=NULL)