diff --git a/NEWS.md b/NEWS.md index 834cf3ca34..7a559dc5f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -302,6 +302,8 @@ 41. Using `get`/`mget` in `j` could cause `.SDcols` to be ignored or reordered, [#1744](https://github.com/Rdatatable/data.table/issues/1744), [#1965](https://github.com/Rdatatable/data.table/issues/1965), [#2036](https://github.com/Rdatatable/data.table/issues/2036), and [#2946](https://github.com/Rdatatable/data.table/issues/2946). Thanks @franknarf1, @MichaelChirico, @TonyBonen, and Steffen J. (StackOverflow) for the reports. +42. `DT[...,by={...}]` now handles expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/R/data.table.R b/R/data.table.R index 8b4507daf1..a8498899f5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -818,28 +818,28 @@ replace_dot_alias = function(e) { tt = vapply_1i(byval,length) if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be length ", xnrow, "; the same length as there are rows in x (after subsetting if i is provided).") if (is.null(bynames)) bynames = rep.int("",length(byval)) - if (any(bynames=="") && !bynull) { - for (jj in seq_along(bynames)) { - if (bynames[jj]=="") { - # Best guess. Use "month" in the case of by=month(date), use "a" in the case of by=a%%2 - byvars = all.vars(bysubl[[jj+1L]], functions = TRUE) - if (length(byvars) == 1L) tt = byvars - else { - # take the first variable that is (1) not eval (#3758) and (2) starts with a character that can't start a variable name - tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE) - # byvars but exclude functions or `0`+`1` becomes `+` - tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L] - } - # fix for #497 - if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) { - bynames[jj] = deparse(bysubl[[jj+1L]]) - if (verbose) - cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt, - "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="") - } - else bynames[jj] = tt - # if user doesn't like this inferred name, user has to use by=list() to name the column + if (length(idx <- which(!nzchar(bynames))) && !bynull) { + # TODO: improve this and unify auto-naming of jsub and bysub + if (is.name(bysubl[[1L]]) && bysubl[[1L]] == '{') bysubl = bysubl[[length(bysubl)]] # fix for #3156 + for (jj in idx) { + # Best guess. Use "month" in the case of by=month(date), use "a" in the case of by=a%%2 + byvars = all.vars(bysubl[[jj+1L]], functions = TRUE) + if (length(byvars) == 1L) tt = byvars + else { + # take the first variable that is (1) not eval (#3758) and (2) starts with a character that can't start a variable name + tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE) + # byvars but exclude functions or `0`+`1` becomes `+` + tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L] + } + # fix for #497 + if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) { + bynames[jj] = deparse(bysubl[[jj+1L]]) + if (verbose) + cat("by-expression '", bynames[jj], "' is not named, and the auto-generated name '", tt, + "' clashed with variable(s) in j. Therefore assigning the entire by-expression as name.\n", sep="") } + else bynames[jj] = tt + # if user doesn't like this inferred name, user has to use by=list() to name the column } # Fix for #1334 if (any(duplicated(bynames))) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b2bd1cf140..8aa14c6cec 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16060,6 +16060,10 @@ test(2108.11, first(df), df, notOutput="xts") test(2108.12, tail(df), df, notOutput="xts") options(old) +# error in autonaming by={...}, #3156 +DT = data.table(State=c("ERROR", "COMPLETED", "ERROR"), ExitCode=c(1, 0, 2)) +test(2109, DT[ , list(count=.N), by={list(State, ExitCode)}], DT[ , count := 1L]) + ################################### # Add new tests above this line #