Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

5. `foverlaps()` returned incorrect results overlapping on POSIXct objects which were <= `1970-01-01`, i.e., datetime values that were represented internally as -ve numeric values. This is now fixed. Closes [#3349](https://github.com/Rdatatable/data.table/issues/3349). Thanks to @lux5 for reporting.

6. Several issues were filed regarding limitations of `dcast.data.table` in handling `fun.aggregate` argument when the functions are not directly provided to the argument as `fun.aggregate <- list(sum, mean)` and instead are stored in a variable, e.g., `funs <- list(sum, mean)` and referred to as `fun.aggregate=funs`. This fix closes several issues [#1974](https://github.com/Rdatatable/data.table/issues/1974), [#1369](https://github.com/Rdatatable/data.table/issues/1369), [#2064](https://github.com/Rdatatable/data.table/issues/2064) and [#2949](https://github.com/Rdatatable/data.table/issues/2949). Thanks to @sunbee, @Ping2016, @smidelius and @d0rg0ld for reporting.

#### NOTES

1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background.
Expand Down
11 changes: 7 additions & 4 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1724,7 +1724,8 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
gfuns = c("sum", "prod", "mean", "median", "var", "sd", ".N", "min", "max", "head", "last", "first", "tail", "[") # added .N for #5760
.ok <- function(q) {
if (dotN(q)) return(TRUE) # For #5760
cond = is.call(q) && length(q1c <- as.character(q[[1L]]))==1L && q1c %chin% gfuns && !is.call(q[[2L]])
# Need is.symbol() check. See #1369, #1974 or #2949 issues and explanation below by searching for one of these issues.
cond = is.call(q) && is.symbol(q[[1]]) && (q1c <- as.character(q[[1]])) %chin% gfuns && !is.call(q[[2L]])
# run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls
ans = cond && (length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L)))
if (identical(ans, TRUE)) return(ans)
Expand Down Expand Up @@ -1761,9 +1762,11 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) {
oldjsub = jsub
if (jsub[[1L]]=="list") {
for (ii in seq_along(jsub)[-1L]) {
if (dotN(jsub[[ii]])) next; # For #5760
if (is.call(jsub[[ii]]) && jsub[[ii]][[1L]]=="mean")
jsub[[ii]] = .optmean(jsub[[ii]])
this_jsub = jsub[[ii]]
if (dotN(this_jsub)) next; # For #5760
# Addressing #1369, #2949 and #1974. Added is.symbol() check to handle cases where expanded function definition is used insead of function names. #1369 results in (function(x) sum(x)) as jsub[[.]] from dcast.data.table.
if (is.call(this_jsub) && is.symbol(this_jsub[[1L]]) && this_jsub[[1L]]=="mean")
jsub[[ii]] = .optmean(this_jsub)
}
} else if (jsub[[1L]]=="mean") {
jsub = .optmean(jsub)
Expand Down
20 changes: 13 additions & 7 deletions R/fcast.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,20 @@ value_vars <- function(value.var, varnames) {
aggregate_funs <- function(funs, vals, sep="_", ...) {
if (is.call(funs) && funs[[1L]] == "eval")
funs = eval(funs[[2L]], parent.frame(2L), parent.frame(2L))
if (is.call(funs) && as.character(funs[[1L]]) %chin% c("c", "list"))
if (is.call(funs) && as.character(funs[[1L]]) %chin% c("c", "list")) {
funs = lapply(as.list(funs)[-1L], function(x) {
if (is.call(x) && as.character(x[[1L]]) %chin% c("c", "list")) as.list(x)[-1L] else x
})
else funs = list(funs)
} else funs = eval(funs, parent.frame(2L), parent.frame(2L))
if(is.function(funs)) funs = list(funs) # needed for cases as shown in test#1700.1
if (length(funs) != length(vals)) {
if (length(vals) == 1L)
vals = replicate(length(funs), vals)
else stop("When 'fun.aggregate' and 'value.var' are both lists, 'value.var' must be either of length =1 or =length(fun.aggregate).")
}
only_one_fun = length(unlist(funs)) == 1L
dots = list(...)
construct_funs <- function(fun, val) {
if (!is.list(fun)) fun = list(fun)
construct_funs <- function(fun, nm, val) {
ans = vector("list", length(fun)*length(val))
nms = vector("character", length(ans))
k = 1L
Expand All @@ -89,14 +89,20 @@ aggregate_funs <- function(funs, vals, sep="_", ...) {
expr = c(expr, dots)
ans[[k]] = as.call(expr)
# changed order of arguments here, #1153
nms[k] = if (only_one_fun) j else
paste(j, all.names(i, max.names=1L, functions=TRUE), sep=sep)
nms[k] = if (only_one_fun) j else paste(j, nm, sep=sep)
k = k+1L;
}
}
setattr(ans, 'names', nms)
}
ans = mapply(construct_funs, funs, vals, SIMPLIFY=FALSE)
ans = lapply(seq_along(funs), function(i) {
nm <- names(funs[i])
if (is.null(nm) || !nzchar(nm)) {
nm <- all.names(funs[[i]], max.names=1L, functions=TRUE)
}
if (!length(nm)) nm <- paste0("fun", i)
construct_funs(funs[i], nm, vals[[i]])
})
as.call(c(quote(list), unlist(ans)))
}

Expand Down
32 changes: 32 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -13427,6 +13427,38 @@ DT = as.data.table(mtcars)[1]
test(1986.3, DT[, colMeans(.SD), by=gear], data.table(gear=4, V1=c(21,6,160,110,3.9,2.62,16.46,0,1,4)))
test(1986.4, DT[, as.list(colMeans(.SD)), by=gear], cbind(DT[,"gear"],DT[,-"gear"]))

# tests for #2949, #1974 and #1369 - dcast not able to handle functions referred to by a variable
dt = data.table(
x=sample(5,20,TRUE),
y=sample(2,20,TRUE),
z=sample(letters[1:2], 20,TRUE),
d1 = runif(20),
d2=1L
)
myFun1 <- function(data, vars) {
mySum <- function(x) sum(x)
dcast.data.table(data, "x + y ~ z", value.var=vars, fun.aggregate=mySum)
}
myFun2 <- function(data, vars) {
myFuns <- list(f1=sum, first=function(x) x[1L])
dcast.data.table(data, "x + y ~ z", value.var=vars, fun.aggregate=myFuns)
}
funs = list(sum, mean)
vars = list("d1", "d2")
test(1987.1,
names(dcast.data.table(dt, x + y ~ z, fun=funs, value.var=vars)),
c("x", "y", "d1_fun1_a", "d1_fun1_b", "d2_fun2_a", "d2_fun2_b")
)
test(1987.2,
dcast.data.table(dt, x + y ~ z, fun=sum, value.var=vars[[1]]),
myFun1(dt, vars[[1]])
)
test(1987.3,
dcast.data.table(dt, x + y ~ z, fun=list(f1=sum, first=function(x) x[1L]), value.var=vars),
myFun2(dt, vars)
)



###################################
# Add new tests above this line #
Expand Down