From f2d037fc7dbee5e0322d044482985497f1b560a2 Mon Sep 17 00:00:00 2001 From: Ofek Shilon Date: Sat, 8 May 2021 20:42:01 +0300 Subject: [PATCH 1/2] Fix #4981: don't trust all.vars when its arguments contains get/eval and siblings. --- R/data.table.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 9ac5ae7d6c..f0e2508e86 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -756,7 +756,14 @@ replace_dot_alias = function(e) { bysub = parse(text=paste0("list(",paste(bysub,collapse=","),")"))[[1L]] bysubl = as.list.default(bysub) } - allbyvars = intersect(all.vars(bysub), names_x) + # Fix 4981: when the 'by' expression includes get/mget/eval, all.vars + # cannot be trusted to infer all used columns + bysub.elems <- rapply(as.list(bysub), as.character) + if (any(c("eval","evalq","eval.parent","local","get","mget","dynGet") %chin% bysub.elems)) + allbyvars = NULL + else + allbyvars = intersect(all.vars(bysub), names_x) + orderedirows = .Call(CisOrderedSubset, irows, nrow(x)) # TRUE when irows is NULL (i.e. no i clause). Similar but better than is.sorted(f__) bysameorder = byindex = FALSE if (!bysub %iscall% ":" && ##Fix #4285 From df74b8a0684a09a0455af17bb1a6e8b92e17c1f2 Mon Sep 17 00:00:00 2001 From: Ofek Shilon Date: Sun, 9 May 2021 09:14:51 +0300 Subject: [PATCH 2/2] Add unit tests for #4981 --- inst/tests/tests.Rraw | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index fb69d4d8b9..0121e0e518 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17410,3 +17410,14 @@ x = data.table(id = 1:4, key = 'id') y = data.table(id = 2:5, key = 'id') z = data.table(c=c(2L, 2L, 1L, 1L), id=c(2L, 4L, 3L, NA)) test(2178, x[y, .SD, by=.(c(2L, 1L, 2L, 1L))], z) + +# `keyby` allows mixing eval/get with direct columns, #4981 +dt <- data.table(a=c(1,2), b=c(3,4), c=c(1,0)) +dt2 <- dt[,.(suma=sum(a)), keyby=.(b=get("b"),c)] +test(2179.1, dt2[1, suma], 1) +dt2 <- dt[2,.(suma=sum(a)), keyby=.(b=b,c)] +test(2179.2, dt2[1, suma], 2) +dt2 <- dt[2,.(suma=sum(a)), keyby=.(b=get("b"))] +test(2179.3, dt2[1, suma], 2) +dt2 <- dt[2,.(suma=sum(a)), keyby=.(b=get("b"),c)] +test(2179.4, dt2[1, suma], 2)