diff --git a/NEWS.md b/NEWS.md index 6b6999c7a6..0a127fae71 100644 --- a/NEWS.md +++ b/NEWS.md @@ -232,6 +232,8 @@ 29. `integer64` defined on a subset of a new column would leave "gibberish" on the remaining rows, [#3723](https://github.com/Rdatatable/data.table/issues/3723). A bug in `rbindlist` with the same root cause was also fixed, [#1459](https://github.com/Rdatatable/data.table/issues/1459). Thanks @shrektan and @jangorecki for the reports. +30. `groupingsets` functions now properly handle alone special symbols when using an empty set to group by, [#3653](https://github.com/Rdatatable/data.table/issues/3653). Thanks to @Henrik-P for the report. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/R/groupingsets.R b/R/groupingsets.R index a13d71cd50..dadcbb0fe8 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -72,15 +72,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...) if (missing(.SDcols)) .SDcols = if (".SD" %chin% av) setdiff(names(x), by) else NULL # 0 rows template data.table to keep colorder and type - if (length(by)) { - empty = if (length(.SDcols)) x[0L, eval(jj), by, .SDcols=.SDcols] else x[0L, eval(jj), by] - } else { - empty = if (length(.SDcols)) x[0L, eval(jj), .SDcols=.SDcols] else x[0L, eval(jj)] - if (!is.data.table(empty)) { - if (length(empty)>0) empty = empty[0L] # fix for #3173 when no grouping and j constant - empty = setDT(list(empty)) # improve after #648, see comment in aggregate.set - } - } + empty = if (length(.SDcols)) x[0L, eval(jj), by, .SDcols=.SDcols] else x[0L, eval(jj), by] if (id && "grouping" %chin% names(empty)) # `j` could have been evaluated to `grouping` field stop("When using `id=TRUE` the 'j' expression must not evaluate to a column named 'grouping'.") if (anyDuplicated(names(empty)) > 0L) @@ -98,13 +90,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...) int64.by.cols = intersect(int64.cols, by) # aggregate function called for each grouping set aggregate.set = function(by.set) { - if (length(by.set)) { - r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set] - } else { - r = if (length(.SDcols)) x[, eval(jj), .SDcols=.SDcols] else x[, eval(jj)] - # workaround for grand total single var as data.table too, change to drop=FALSE after #648 solved - if (!is.data.table(r)) r = setDT(list(r)) - } + r = if (length(.SDcols)) x[, eval(jj), by.set, .SDcols=.SDcols] else x[, eval(jj), by.set] if (id) { # integer bit mask of aggregation levels: http://www.postgresql.org/docs/9.5/static/functions-aggregate.html#FUNCTIONS-GROUPING-TABLE # 3267: strtoi("", base = 2L) output apparently unstable across platforms diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aade8300e6..af01a1addd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15738,6 +15738,15 @@ if (test_bit64) { test(2083.2, rbind(data.table(a=1:2, b=as.integer64(c(1,NA))), data.table(a=3L), fill=TRUE)$b, as.integer64(c(1, NA, NA))) } +# groupingsets j=.N by character(0) set #3653 +d = data.table(x = c("a", "a", "b")) +test(2084.01, groupingsets(d, j = .N, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), N=c(2L,1L,3L))) +test(2084.02, groupingsets(d, j = .N, by = "x", sets = list(character())), data.table(x=NA_character_, N=3L)) +test(2084.03, groupingsets(d, j = .GRP, by = "x", sets = list("x", character())), data.table(x=c("a","b",NA_character_), GRP=c(1L,2L,1L))) +test(2084.04, groupingsets(d, j = .GRP, by = "x", sets = list(character())), data.table(x=NA_character_, GRP=1L)) +test(2084.05, groupingsets(d, j = .I, by = "x", sets = list("x", character())), data.table(x=c("a","a","b",rep(NA_character_,3L)), I=c(1:3,1:3))) +test(2084.06, groupingsets(d, j = .I, by = "x", sets = list(character())), data.table(x=rep(NA_character_,3L), I=1:3)) + ################################### # Add new tests above this line #