diff --git a/NEWS.md b/NEWS.md index 6fc96a8d62..74fc5f00e7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,6 +18,8 @@ 5. `foverlaps()` returned incorrect results overlapping on POSIXct objects which were <= `1970-01-01`, i.e., datetime values that were represented internally as -ve numeric values. This is now fixed. Closes [#3349](https://github.com/Rdatatable/data.table/issues/3349). Thanks to @lux5 for reporting. +6. Several issues were filed regarding limitations of `dcast.data.table` in handling `fun.aggregate` argument when the functions are not directly provided to the argument as `fun.aggregate <- list(sum, mean)` and instead are stored in a variable, e.g., `funs <- list(sum, mean)` and referred to as `fun.aggregate=funs`. This fix closes several issues [#1974](https://github.com/Rdatatable/data.table/issues/1974), [#1369](https://github.com/Rdatatable/data.table/issues/1369), [#2064](https://github.com/Rdatatable/data.table/issues/2064) and [#2949](https://github.com/Rdatatable/data.table/issues/2949). Thanks to @sunbee, @Ping2016, @smidelius and @d0rg0ld for reporting. + #### NOTES 1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background. diff --git a/R/data.table.R b/R/data.table.R index c119e6eb68..96b581b968 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1724,7 +1724,8 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { gfuns = c("sum", "prod", "mean", "median", "var", "sd", ".N", "min", "max", "head", "last", "first", "tail", "[") # added .N for #5760 .ok <- function(q) { if (dotN(q)) return(TRUE) # For #5760 - cond = is.call(q) && length(q1c <- as.character(q[[1L]]))==1L && q1c %chin% gfuns && !is.call(q[[2L]]) + # Need is.symbol() check. See #1369, #1974 or #2949 issues and explanation below by searching for one of these issues. + cond = is.call(q) && is.symbol(q[[1]]) && (q1c <- as.character(q[[1]])) %chin% gfuns && !is.call(q[[2L]]) # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls ans = cond && (length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) if (identical(ans, TRUE)) return(ans) @@ -1761,9 +1762,11 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { oldjsub = jsub if (jsub[[1L]]=="list") { for (ii in seq_along(jsub)[-1L]) { - if (dotN(jsub[[ii]])) next; # For #5760 - if (is.call(jsub[[ii]]) && jsub[[ii]][[1L]]=="mean") - jsub[[ii]] = .optmean(jsub[[ii]]) + this_jsub = jsub[[ii]] + if (dotN(this_jsub)) next; # For #5760 + # Addressing #1369, #2949 and #1974. Added is.symbol() check to handle cases where expanded function definition is used insead of function names. #1369 results in (function(x) sum(x)) as jsub[[.]] from dcast.data.table. + if (is.call(this_jsub) && is.symbol(this_jsub[[1L]]) && this_jsub[[1L]]=="mean") + jsub[[ii]] = .optmean(this_jsub) } } else if (jsub[[1L]]=="mean") { jsub = .optmean(jsub) diff --git a/R/fcast.R b/R/fcast.R index 087f65fce7..dde6b8402c 100644 --- a/R/fcast.R +++ b/R/fcast.R @@ -65,11 +65,12 @@ value_vars <- function(value.var, varnames) { aggregate_funs <- function(funs, vals, sep="_", ...) { if (is.call(funs) && funs[[1L]] == "eval") funs = eval(funs[[2L]], parent.frame(2L), parent.frame(2L)) - if (is.call(funs) && as.character(funs[[1L]]) %chin% c("c", "list")) + if (is.call(funs) && as.character(funs[[1L]]) %chin% c("c", "list")) { funs = lapply(as.list(funs)[-1L], function(x) { if (is.call(x) && as.character(x[[1L]]) %chin% c("c", "list")) as.list(x)[-1L] else x }) - else funs = list(funs) + } else funs = eval(funs, parent.frame(2L), parent.frame(2L)) + if(is.function(funs)) funs = list(funs) # needed for cases as shown in test#1700.1 if (length(funs) != length(vals)) { if (length(vals) == 1L) vals = replicate(length(funs), vals) @@ -77,8 +78,7 @@ aggregate_funs <- function(funs, vals, sep="_", ...) { } only_one_fun = length(unlist(funs)) == 1L dots = list(...) - construct_funs <- function(fun, val) { - if (!is.list(fun)) fun = list(fun) + construct_funs <- function(fun, nm, val) { ans = vector("list", length(fun)*length(val)) nms = vector("character", length(ans)) k = 1L @@ -89,14 +89,20 @@ aggregate_funs <- function(funs, vals, sep="_", ...) { expr = c(expr, dots) ans[[k]] = as.call(expr) # changed order of arguments here, #1153 - nms[k] = if (only_one_fun) j else - paste(j, all.names(i, max.names=1L, functions=TRUE), sep=sep) + nms[k] = if (only_one_fun) j else paste(j, nm, sep=sep) k = k+1L; } } setattr(ans, 'names', nms) } - ans = mapply(construct_funs, funs, vals, SIMPLIFY=FALSE) + ans = lapply(seq_along(funs), function(i) { + nm <- names(funs[i]) + if (is.null(nm) || !nzchar(nm)) { + nm <- all.names(funs[[i]], max.names=1L, functions=TRUE) + } + if (!length(nm)) nm <- paste0("fun", i) + construct_funs(funs[i], nm, vals[[i]]) + }) as.call(c(quote(list), unlist(ans))) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e58e92e357..2d5cff082f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13427,6 +13427,38 @@ DT = as.data.table(mtcars)[1] test(1986.3, DT[, colMeans(.SD), by=gear], data.table(gear=4, V1=c(21,6,160,110,3.9,2.62,16.46,0,1,4))) test(1986.4, DT[, as.list(colMeans(.SD)), by=gear], cbind(DT[,"gear"],DT[,-"gear"])) +# tests for #2949, #1974 and #1369 - dcast not able to handle functions referred to by a variable +dt = data.table( + x=sample(5,20,TRUE), + y=sample(2,20,TRUE), + z=sample(letters[1:2], 20,TRUE), + d1 = runif(20), + d2=1L +) +myFun1 <- function(data, vars) { + mySum <- function(x) sum(x) + dcast.data.table(data, "x + y ~ z", value.var=vars, fun.aggregate=mySum) +} +myFun2 <- function(data, vars) { + myFuns <- list(f1=sum, first=function(x) x[1L]) + dcast.data.table(data, "x + y ~ z", value.var=vars, fun.aggregate=myFuns) +} +funs = list(sum, mean) +vars = list("d1", "d2") +test(1987.1, + names(dcast.data.table(dt, x + y ~ z, fun=funs, value.var=vars)), + c("x", "y", "d1_fun1_a", "d1_fun1_b", "d2_fun2_a", "d2_fun2_b") +) +test(1987.2, + dcast.data.table(dt, x + y ~ z, fun=sum, value.var=vars[[1]]), + myFun1(dt, vars[[1]]) +) +test(1987.3, + dcast.data.table(dt, x + y ~ z, fun=list(f1=sum, first=function(x) x[1L]), value.var=vars), + myFun2(dt, vars) +) + + ################################### # Add new tests above this line #