From 6ce6020b8499fe99deab9c1ce1c295d057b373d5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 2 Feb 2020 17:39:51 +0800 Subject: [PATCH 1/6] steps towards unifying interface to NSE internally --- R/between.R | 4 +-- R/data.table.R | 66 ++++++++++++++++++++++++-------------------------- R/fcast.R | 20 ++++++--------- R/fmelt.R | 2 +- R/nse_utils.R | 7 ++++++ R/setkey.R | 4 +-- 6 files changed, 52 insertions(+), 51 deletions(-) create mode 100644 R/nse_utils.R diff --git a/R/between.R b/R/between.R index 4e358c3310..054f9cce66 100644 --- a/R/between.R +++ b/R/between.R @@ -55,13 +55,13 @@ between = function(x, lower, upper, incbounds=TRUE, NAbounds=TRUE, check=FALSE) # %between% is vectorised, #534. "%between%" = function(x, y) { ysub = substitute(y) - if (is.call(ysub) && ysub[[1L]]==".") { + if (sub_is_fun(ysub, ".")) { ysub[[1L]]=quote(list) y = eval.parent(ysub) } if ((l <- length(y)) != 2L) { stop("RHS has length() ", l, "; expecting length 2. ", - if (is.call(ysub) && ysub[[1L]] == 'c') + if (sub_is_fun(ysub, 'c')) sprintf("Perhaps you meant %s? ", capture.output(print(`[[<-`(ysub, 1L, quote(list))))), "The first element should be the lower bound(s); ", diff --git a/R/data.table.R b/R/data.table.R index af4f14de0b..16287d3b3e 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -102,7 +102,7 @@ replace_dot_alias = function(e) { .massagei = function(x) { # J alias for list as well in i, just if the first symbol # if x = substitute(base::order) then as.character(x[[1L]]) == c("::", "base", "order") - if (is.call(x) && as.character(x[[1L]])[[1L]] %chin% c("J",".")) + if (sub_in_funs(x, c("J","."))) x[[1L]] = quote(list) x } @@ -216,7 +216,7 @@ replace_dot_alias = function(e) { jsub = replace_dot_alias(substitute(j)) root = if (is.call(jsub)) as.character(jsub[[1L]])[1L] else "" if (root == ":" || - (root %chin% c("-","!") && is.call(jsub[[2L]]) && jsub[[2L]][[1L]]=="(" && is.call(jsub[[2L]][[2L]]) && jsub[[2L]][[2L]][[1L]]==":") || + (root %chin% c("-","!") && sub_is_fun(jsub[[2L]], '(') && sub_is_fun(jsub[[2L]][[2L]], ':')) || ( (!length(av<-all.vars(jsub)) || all(substring(av,1L,2L)=="..")) && root %chin% c("","c","paste","paste0","-","!") && missingby )) { # test 763. TODO: likely that !missingby iff with==TRUE (so, with can be removed) @@ -259,7 +259,7 @@ replace_dot_alias = function(e) { if (length(jsub) == 2L) { jsub = jsub[[2L]] # to allow {} wrapping of := e.g. [,{`:=`(...)},] [#376] root = if (is.call(jsub)) as.character(jsub[[1L]])[1L] else "" - } else if (length(jsub) > 2L && is.call(jsub[[2L]]) && jsub[[2L]][[1L]] == ":=") { + } else if (length(jsub) > 2L && sub_is_fun(jsub[[2L]], ":=")) { #2142 -- j can be {} and have length 1 stop("You have wrapped := with {} which is ok but then := must be the only thing inside {}. You have something else inside {} as well. Consider placing the {} on the RHS of := instead; e.g. DT[,someCol:={tmpVar1<-...;tmpVar2<-...;tmpVar1*tmpVar2}") } @@ -323,17 +323,17 @@ replace_dot_alias = function(e) { assign(".N", nrow(x), envir=parent.frame(), inherits=FALSE) remove.N = TRUE } - if (is.call(isub) && isub[[1L]]=="eval") { # TO DO: or ..() + if (sub_is_fun(isub, "eval")) { # TO DO: or ..() isub = eval(.massagei(isub[[2L]]), parent.frame(), parent.frame()) if (is.expression(isub)) isub=isub[[1L]] } - if (is.call(isub) && isub[[1L]] == as.name("!")) { + if (sub_is_fun(isub, "!")) { notjoin = TRUE if (!missingnomatch) stop("not-join '!' prefix is present on i but nomatch is provided. Please remove nomatch."); nomatch = 0L isub = isub[[2L]] # #932 related so that !(v1 == 1) becomes v1 == 1 instead of (v1 == 1) after removing "!" - if (is.call(isub) && isub[[1L]] == "(" && !is.name(isub[[2L]])) + if (sub_is_fun(isub, "(") && !is.name(isub[[2L]])) isub = isub[[2L]] } @@ -627,7 +627,7 @@ replace_dot_alias = function(e) { # j was substituted before dealing with i so that := can set allow.cartesian=FALSE (#800) (used above in i logic) if (is.null(jsub)) return(NULL) - if (!with && is.call(jsub) && jsub[[1L]]==":=") { + if (!with && sub_is_fun(jsub, ":=")) { # TODO: make these both errors (or single long error in both cases) in next release. # i.e. using with=FALSE together with := at all will become an error. Eventually with will be removed. if (is.null(names(jsub)) && is.name(jsub[[2L]])) { @@ -641,13 +641,13 @@ replace_dot_alias = function(e) { if (!with) { # missingby was already checked above before dealing with i - if (is.call(jsub) && length(jsub)==2L && as.character(jsub[[1L]]) %chin% c("!", "-")) { # length 2 to only match unary, #2109 + if (sub_in_funs(jsub, c("!", "-")) && length(jsub)==2L) { # length 2 to only match unary, #2109 notj = TRUE jsub = jsub[[2L]] } else notj = FALSE # fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4))) - while (is.call(jsub) && jsub[[1L]] == "(") jsub = as.list(jsub)[[-1L]] - if (is.call(jsub) && length(jsub) == 3L && jsub[[1L]] == ":") { + while (sub_is_fun(jsub, "(")) jsub = as.list(jsub)[[-1L]] + if (sub_is_fun_length(jsub, ':', 3L)) { j = eval(jsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) # else j will be evaluated for the first time on next line } else { names(..syms) = ..syms @@ -713,7 +713,7 @@ replace_dot_alias = function(e) { bysub = replace_dot_alias(bysub) # fix for #1298 if (is.expression(bysub)) bysub=bysub[[1L]] bysubl = as.list.default(bysub) - } else if (is.call(bysub) && as.character(bysub[[1L]]) %chin% c("c","key","names", "intersect", "setdiff")) { + } else if (sub_in_funs(bysub, c("c","key","names", "intersect", "setdiff"))) { # catch common cases, so we don't have to copy x[irows] for all columns # *** TO DO ***: try() this eval first (as long as not list() or .()) and see if it evaluates to column names # to avoid the explicit c,key,names which already misses paste("V",1:10) for example @@ -723,11 +723,11 @@ replace_dot_alias = function(e) { tt = eval(bysub, parent.frame(), parent.frame()) if (!is.character(tt)) stop("by=c(...), key(...) or names(...) must evaluate to 'character'") bysub=tt - } else if (is.call(bysub) && !as.character(bysub[[1L]]) %chin% c("list", "as.list", "{", ".", ":")) { + } else if (sub_not_funs(bysub, c("list", "as.list", "{", ".", ":"))) { # potential use of function, ex: by=month(date). catch it and wrap with "(", because we need to set "bysameorder" to FALSE as we don't know if the function will return ordered results just because "date" is ordered. Fixes #2670. bysub = as.call(c(as.name('('), list(bysub))) bysubl = as.list.default(bysub) - } else if (is.call(bysub) && bysub[[1L]] == ".") bysub[[1L]] = quote(list) + } else if (sub_is_fun(bysub, ".")) bysub[[1L]] = quote(list) if (mode(bysub) == "character") { if (length(grep(",", bysub, fixed = TRUE))) { @@ -763,7 +763,7 @@ replace_dot_alias = function(e) { } if (is.null(irows)) { - if (is.call(bysub) && length(bysub) == 3L && bysub[[1L]] == ":" && is.name(bysub[[2L]]) && is.name(bysub[[3L]])) { + if (sub_is_fun_length(bysub, ':', 3L) && is.name(bysub[[2L]]) && is.name(bysub[[3L]])) { byval = eval(bysub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) byval = as.list(x)[byval] } else byval = eval(bysub, x, parent.frame()) @@ -784,7 +784,7 @@ replace_dot_alias = function(e) { if (verbose) cat("i clause present but columns used in by not detected. Having to subset all columns before evaluating 'by': '",deparse(by),"'\n",sep="") xss = x[irows,nomatch=nomatch,mult=mult,roll=roll,rollends=rollends] } - if (is.call(bysub) && length(bysub) == 3L && bysub[[1L]] == ":") { + if (sub_is_fun_length(bysub, ':', 3L)) { byval = eval(bysub, setattr(as.list(seq_along(xss)), 'names', names(xss)), parent.frame()) byval = as.list(xss)[byval] } else byval = eval(bysub, xss, parent.frame()) @@ -920,17 +920,17 @@ replace_dot_alias = function(e) { # FR #4979 - negative numeric and character indices for SDcols colsub = substitute(.SDcols) # fix for #5190. colsub[[1L]] gave error when it's a symbol. - if (is.call(colsub) && deparse(colsub[[1L]], 500L, backtick=FALSE) %chin% c("!", "-")) { + if (sub_in_funs(colsub, c("!", "-"))) { negate_sdcols = TRUE colsub = colsub[[2L]] } else negate_sdcols = FALSE # fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4))) - while(is.call(colsub) && colsub[[1L]] == "(") colsub = as.list(colsub)[[-1L]] - if (is.call(colsub) && length(colsub) == 3L && colsub[[1L]] == ":") { + while(sub_is_fun(colsub, "(")) colsub = as.list(colsub)[[-1L]] + if (sub_is_fun_length(colsub, ':', 3L)) { # .SDcols is of the format a:b .SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) } else { - if (is.call(colsub) && colsub[[1L]] == "patterns") { + if (sub_is_fun(colsub, 'patterns')) { # each pattern gives a new filter condition, intersect the end result .SDcols = Reduce(intersect, do_patterns(colsub, names_x)) } else { @@ -998,7 +998,7 @@ replace_dot_alias = function(e) { } # Do not include z in .SD when dt[, z := {.SD; get("x")}, .SDcols = "y"] (#2326, #2338) - if (is.call(jsub) && length(jsub[[1L]]) == 1L && jsub[[1L]] == ":=" && is.symbol(jsub[[2L]])) { + if (sub_is_fun(jsub, ':=') && is.symbol(jsub[[2L]])) { jsub_lhs_symbol = as.character(jsub[[2L]]) if (jsub_lhs_symbol %chin% non_sdvars) { sdvars = setdiff(sdvars, jsub_lhs_symbol) @@ -1105,7 +1105,7 @@ replace_dot_alias = function(e) { setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope if (is.name(name)) { assign(as.character(name),x,parent.frame(),inherits=TRUE) - } else if (is.call(name) && (name[[1L]] == "$" || name[[1L]] == "[[") && is.name(name[[2L]])) { + } else if (sub_in_funs(name, c('$', '[[')) && is.name(name[[2L]])) { k = eval(name[[2L]], parent.frame(), parent.frame()) if (is.list(k)) { origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame()) @@ -1295,7 +1295,7 @@ replace_dot_alias = function(e) { jval = copy(jval) } else if ( length(jcpy <- which(vapply_1c(jval, address) %chin% vapply_1c(SDenv, address))) ) { for (jidx in jcpy) jval[[jidx]] = copy(jval[[jidx]]) - } else if (is.call(jsub) && jsub[[1L]] == "get") { + } else if (sub_is_fun(jsub, 'get')) { jval = copy(jval) # fix for #1212 } } @@ -1305,7 +1305,7 @@ replace_dot_alias = function(e) { .Call(Cassign,x,irows,cols,newnames,jval) return(suppPrint(x)) } - if ((is.call(jsub) && is.list(jval) && jsub[[1L]] != "get" && !is.object(jval)) || !missingby) { + if ((sub_not_fun(jsub, 'get') && is.list(jval) && !is.object(jval)) || !missingby) { # is.call: selecting from a list column should return list # is.object: for test 168 and 168.1 (S4 object result from ggplot2::qplot). Just plain list results should result in data.table @@ -1482,7 +1482,7 @@ replace_dot_alias = function(e) { txt = as.list(jsub)[-1L] if (length(names(txt))>1L) .Call(Csetcharvec, names(txt), 2L, "") # fixes bug #4839 fun = txt[[2L]] - if (is.call(fun) && fun[[1L]]=="function") { + if (sub_is_fun(fun, "function")) { # Fix for #2381: added SDenv$.SD to 'eval' to take care of cases like: lapply(.SD, function(x) weighted.mean(x, bla)) where "bla" is a column in DT # http://stackoverflow.com/questions/13441868/data-table-and-stratified-means # adding this does not compromise in speed (that is, not any lesser than without SDenv$.SD) @@ -1589,7 +1589,7 @@ replace_dot_alias = function(e) { jvnames = c(jvnames, jn__) jsubl[[i_]] = jl__ } - } else if (is.call(this) && length(this) > 1L && as.character(this[[1L]]) %chin% optfuns) { + } else if (sub_in_funs_length(this, optfuns, 1L, `>`)) { jvnames = c(jvnames, if (is.null(names(jsubl))) "" else names(jsubl)[i_]) } else if ( length(this) == 3L && (this[[1L]] == "[" || this[[1L]] == "head") && this[[2L]] == ".SD" && (is.numeric(this[[3L]]) || this[[3L]] == ".N") ) { @@ -1636,7 +1636,7 @@ replace_dot_alias = function(e) { if (getOption("datatable.optimize")>=2L && !is.data.table(i) && !byjoin && length(f__) && !length(lhs)) { if (!length(ansvars) && !use.I) { GForce = FALSE - if ( (is.name(jsub) && jsub == ".N") || (is.call(jsub) && length(jsub)==2L && jsub[[1L]]== "list" && jsub[[2L]] == ".N") ) { + if ( (is.name(jsub) && jsub == ".N") || (sub_is_fun_length(jsub, 'list', 2L) && jsub[[2L]] == ".N") ) { GForce = TRUE if (verbose) cat("GForce optimized j to '",deparse(jsub, width.cutoff=200L, nlines=1L),"'\n",sep="") } @@ -1682,11 +1682,9 @@ replace_dot_alias = function(e) { oldjsub = jsub if (jsub[[1L]]=="list") { # Addressing #1369, #2949 and #1974. This used to be 30s (vs 0.5s) with 30K elements items in j, #1470. Could have been dotN() and/or the for-looped if() - todo = sapply(jsub, function(x) { - is.call(x) && is.symbol(x[[1L]]) && x[[1L]]=="mean" - # jsub[[1]]=="list" so the first item will always be FALSE - # is.symbol() for when expanded function definition is used instead of function names; #1369 results in (function(x) sum(x)) as jsub[[.]] from dcast.data.table - }) + # jsub[[1]]=="list" so the first item will always be FALSE + # is.symbol() for when expanded function definition is used instead of function names; #1369 results in (function(x) sum(x)) as jsub[[.]] from dcast.data.table + todo = sapply(jsub, sub_is_fun, 'mean') if (any(todo)) { w = which(todo) jsub[w] = lapply(jsub[w], .optmean) @@ -2745,7 +2743,7 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { if (is.name(name)) { name = as.character(name) assign(name, x, parent.frame(), inherits=TRUE) - } else if (is.call(name) && (name[[1L]] == "$" || name[[1L]] == "[[") && is.name(name[[2L]])) { + } else if (sub_in_funs(name, c('$', '[[')) && is.name(name[[2L]])) { # common case is call from 'lapply()' k = eval(name[[2L]], parent.frame(), parent.frame()) if (is.list(k)) { @@ -3012,10 +3010,10 @@ isReallyReal = function(x) { #' 'ops': integer vector. Gives the indices of the operators that connect the columns in x and i. ops = c("==", "<=", "<", ">=", ">", "!=") pat = paste0("(", ops, ")", collapse="|") - if (is.call(onsub) && onsub[[1L]] == "eval") { + if (sub_is_fun(onsub, 'eval')) { onsub = eval(onsub[[2L]], parent.frame(2L), parent.frame(2L)) } - if (is.call(onsub) && as.character(onsub[[1L]]) %chin% c("list", ".")) { + if (sub_in_funs(onsub, c('list', '.'))) { spat = paste0("[ ]+(", pat, ")[ ]+") onsub = lapply(as.list(onsub)[-1L], function(x) gsub(spat, "\\1", deparse(x, width.cutoff=500L))) onsub = as.call(c(quote(c), onsub)) diff --git a/R/fcast.R b/R/fcast.R index a619014b04..8efcd8779c 100644 --- a/R/fcast.R +++ b/R/fcast.R @@ -39,16 +39,12 @@ check_formula = function(formula, varnames, valnames) { deparse_formula = function(expr, varnames, allvars) { lvars = lapply(expr, function(this) { - if (is.call(this)) { - if (this[[1L]] == quote(`+`)) + if (sub_is_fun(this, '+')) { unlist(deparse_formula(as.list(this)[-1L], varnames, allvars)) - else this - } else if (is.name(this)) { - if (this == quote(`...`)) { - subvars = setdiff(varnames, allvars) - lapply(subvars, as.name) - } else this - } + } else if (is.name(this) && this == quote(`...`)) { + subvars = setdiff(varnames, allvars) + lapply(subvars, as.name) + } else this }) lvars = lapply(lvars, function(x) if (length(x) && !is.list(x)) list(x) else x) } @@ -65,11 +61,11 @@ value_vars = function(value.var, varnames) { } aggregate_funs = function(funs, vals, sep="_", ...) { - if (is.call(funs) && funs[[1L]] == "eval") + if (sub_is_fun(funs, 'eval')) funs = eval(funs[[2L]], parent.frame(2L), parent.frame(2L)) - if (is.call(funs) && as.character(funs[[1L]]) %chin% c("c", "list")) { + if (sub_in_funs(funs, c('c', 'list'))) { funs = lapply(as.list(funs)[-1L], function(x) { - if (is.call(x) && as.character(x[[1L]]) %chin% c("c", "list")) as.list(x)[-1L] else x + if (sub_in_funs(x, c('c', 'list'))) as.list(x)[-1L] else x }) } else funs = eval(funs, parent.frame(2L), parent.frame(2L)) if(is.function(funs)) funs = list(funs) # needed for cases as shown in test#1700.1 diff --git a/R/fmelt.R b/R/fmelt.R index 32e023f642..e07d87e28d 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -35,7 +35,7 @@ melt.data.table = function(data, id.vars, measure.vars, variable.name = "variabl if (missing(id.vars)) id.vars=NULL if (missing(measure.vars)) measure.vars = NULL measure.sub = substitute(measure.vars) - if (is.call(measure.sub) && measure.sub[[1L]] == "patterns") { + if (sub_is_fun(measure.sub, "patterns")) { measure.vars = do_patterns(measure.sub, names(data)) } if (is.list(measure.vars) && length(measure.vars) > 1L) { diff --git a/R/nse_utils.R b/R/nse_utils.R new file mode 100644 index 0000000000..8d4bcfd1ee --- /dev/null +++ b/R/nse_utils.R @@ -0,0 +1,7 @@ +sub_is_fun = function(e, fun) is.call(e) && is.symbol(e[[1L]]) && e[[1L]] == fun +sub_is_fun_length = function(e, fun, n, compare = `==`) is.call(e) && compare(length(e), n) && is.symbol(e[[1L]]) && e[[1L]] == fun +sub_not_fun = function(e, fun) is.call(e) && is.symbol(e[[1L]]) && e[[1L]] != fun +# TODO: chmatch could work on expressions directly for conciseness here +sub_in_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && as.character(e[[1L]]) %chin% funs +sub_in_funs_length = function(e, funs, n, compare = `==`) is.call(e) && compare(length(e), n) && is.symbol(e[[1L]]) && as.character(e[[1L]]) %chin% funs +sub_not_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && !as.character(e[[1L]]) %chin% funs \ No newline at end of file diff --git a/R/setkey.R b/R/setkey.R index 63c6155f68..f005e3bcbd 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -200,8 +200,8 @@ forder = function(..., na.last=TRUE, decreasing=FALSE) # We intercept the unevaluated expressions and massage them before evaluating in with(DT) scope or not depending on the first item. for (i in seq.int(2L, length(sub))) { v = sub[[i]] - while (is.call(v) && length(v)==2L && ((s<-v[[1L]])=="-" || s=="+")) { - if (s=="-") asc[i-1L] = -asc[i-1L] + while (sub_in_funs_length(v, c('-', '+'), 2L)) { + if (v[[1L]] == "-") asc[i-1L] = -asc[i-1L] sub[[i]] = v = v[[2L]] # remove the leading +/- which is the 2nd item since length(v)==2; i.e. monadic +/- } } From f2b651048715c40449394e935794c97ee9219e10 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 2 Feb 2020 17:40:07 +0800 Subject: [PATCH 2/6] trailing newline --- R/nse_utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/nse_utils.R b/R/nse_utils.R index 8d4bcfd1ee..1c39bc0d48 100644 --- a/R/nse_utils.R +++ b/R/nse_utils.R @@ -4,4 +4,4 @@ sub_not_fun = function(e, fun) is.call(e) && is.symbol(e[[1L]]) && e[[1L]] != fu # TODO: chmatch could work on expressions directly for conciseness here sub_in_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && as.character(e[[1L]]) %chin% funs sub_in_funs_length = function(e, funs, n, compare = `==`) is.call(e) && compare(length(e), n) && is.symbol(e[[1L]]) && as.character(e[[1L]]) %chin% funs -sub_not_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && !as.character(e[[1L]]) %chin% funs \ No newline at end of file +sub_not_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && !as.character(e[[1L]]) %chin% funs From 3ead0f73e9c6d1b015ca1b0849a6e2125eca48a3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 3 Feb 2020 00:24:01 +0800 Subject: [PATCH 3/6] %chin% accepts SYMSXP --- R/data.table.R | 16 ++++++++-------- R/nse_utils.R | 6 +++--- src/chmatch.c | 8 +++++++- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 16287d3b3e..5cd4f790be 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -701,7 +701,7 @@ replace_dot_alias = function(e) { # may evaluate to NULL | character() | "" | list(), likely a result of a user expression where no-grouping is one case being loop'd through bysubl = as.list.default(bysub) bysuborig = bysub - if (is.name(bysub) && !(as.character(bysub) %chin% names_x)) { # TO DO: names(x),names(i),and i. and x. prefixes + if (is.name(bysub) && !(bysub %chin% names_x)) { # TO DO: names(x),names(i),and i. and x. prefixes bysub = eval(bysub, parent.frame(), parent.frame()) # fix for # 5106 - http://stackoverflow.com/questions/19983423/why-by-on-a-vector-not-from-a-data-table-column-is-very-slow # case where by=y where y is not a column name, and not a call/symbol/expression, but an atomic vector outside of DT. @@ -802,7 +802,7 @@ replace_dot_alias = function(e) { # the rest now fall through } else bynames = names(byval) if (is.atomic(byval)) { - if (is.character(byval) && length(byval)<=ncol(x) && !(is.name(bysub) && as.character(bysub) %chin% names_x) ) { + if (is.character(byval) && length(byval)<=ncol(x) && !(is.name(bysub) && bysub %chin% names_x) ) { stop("'by' appears to evaluate to column names but isn't c() or key(). Use by=list(...) if you can. Otherwise, by=eval",deparse(bysub)," should work. This is for efficiency so data.table can detect which columns are needed.") } else { # by may be a single unquoted column name but it must evaluate to list so this is a convenience to users. Could also be a single expression here such as DT[,sum(v),by=colA%%2] @@ -865,7 +865,7 @@ replace_dot_alias = function(e) { # e.g. DT[, .(a=sum(v), v, .N), by=] should create columns named a, v, N do_j_names = function(q) { if (!is.call(q) || !is.name(q[[1L]])) return(q) - if (as.character(q[[1L]]) %chin% c('list', '.')) { + if (q[[1L]] %chin% c('list', '.')) { q[[1L]] = quote(list) qlen = length(q) if (qlen>1L) { @@ -1473,7 +1473,7 @@ replace_dot_alias = function(e) { lockBinding(".iSD",SDenv) GForce = FALSE - if ( getOption("datatable.optimize")>=1L && (is.call(jsub) || (is.name(jsub) && as.character(jsub)[1L] %chin% c(".SD", ".N"))) ) { # Ability to turn off if problems or to benchmark the benefit + if ( getOption("datatable.optimize")>=1L && (is.call(jsub) || (is.name(jsub) && jsub[[1L]] %chin% c(".SD", ".N"))) ) { # Ability to turn off if problems or to benchmark the benefit # Optimization to reduce overhead of calling lapply over and over for each group oldjsub = jsub funi = 1L # Fix for #985 @@ -1646,14 +1646,14 @@ replace_dot_alias = function(e) { if (dotN(q)) return(TRUE) # For #5760 # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls where x is a column of .SD # is.symbol() is for #1369, #1974 and #2949 - if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1c <- as.character(q[[1L]])) %chin% gfuns)) return(FALSE) - if (!(q2c<-as.character(q[[2L]])) %chin% names(SDenv$.SDall) && q2c!=".I") return(FALSE) # 875 - if ((length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) && (!q1c %chin% c("head","tail"))) return(TRUE) + if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE) + if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE) # 875 + if ((length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) && (!q1 %chin% c("head","tail"))) return(TRUE) # ... head-tail uses default value n=6 which as of now should not go gforce ^^ # otherwise there must be three arguments, and only in two cases: # 1) head/tail(x, 1) or 2) x[n], n>0 length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) && - ( (q1c %chin% c("head", "tail") && q3==1L) || ((q1c == "[" || (q1c == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && q3>0L) ) + ( (q1 %chin% c("head", "tail") && q3==1L) || ((q1 == "[" || (q1 = "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && q3>0L) ) } if (jsub[[1L]]=="list") { GForce = TRUE diff --git a/R/nse_utils.R b/R/nse_utils.R index 1c39bc0d48..87a7e3577f 100644 --- a/R/nse_utils.R +++ b/R/nse_utils.R @@ -2,6 +2,6 @@ sub_is_fun = function(e, fun) is.call(e) && is.symbol(e[[1L]]) && e[[1L]] == fun sub_is_fun_length = function(e, fun, n, compare = `==`) is.call(e) && compare(length(e), n) && is.symbol(e[[1L]]) && e[[1L]] == fun sub_not_fun = function(e, fun) is.call(e) && is.symbol(e[[1L]]) && e[[1L]] != fun # TODO: chmatch could work on expressions directly for conciseness here -sub_in_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && as.character(e[[1L]]) %chin% funs -sub_in_funs_length = function(e, funs, n, compare = `==`) is.call(e) && compare(length(e), n) && is.symbol(e[[1L]]) && as.character(e[[1L]]) %chin% funs -sub_not_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && !as.character(e[[1L]]) %chin% funs +sub_in_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && e[[1L]] %chin% funs +sub_in_funs_length = function(e, funs, n, compare = `==`) is.call(e) && compare(length(e), n) && is.symbol(e[[1L]]) && e[[1L]] %chin% funs +sub_not_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && !e[[1L]] %chin% funs diff --git a/src/chmatch.c b/src/chmatch.c index 8284a5677a..6fb3fbec2a 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -1,7 +1,13 @@ #include "data.table.h" static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatchdup) { - if (!isString(x) && !isNull(x)) error(_("x is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(x))); + if (!isString(x) && !isNull(x)) { + // for use in sub_*_funs from nse_utils.R + if (TYPEOF(x) == SYMSXP) { + return chmatchMain(coerceVector(x, STRSXP), table, nomatch, chin, chmatchdup); + } + error(_("x is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(x))); + } if (!isString(table) && !isNull(table)) error(_("table is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(table))); if (chin && chmatchdup) error(_("Internal error: either chin or chmatchdup should be true not both")); // # nocov const int xlen = length(x); From 5c7c272c1adb26c7b1cc2c328c84e895793a33ef Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 3 Feb 2020 00:29:46 +0800 Subject: [PATCH 4/6] typos --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 5cd4f790be..9f0b238c18 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1473,7 +1473,7 @@ replace_dot_alias = function(e) { lockBinding(".iSD",SDenv) GForce = FALSE - if ( getOption("datatable.optimize")>=1L && (is.call(jsub) || (is.name(jsub) && jsub[[1L]] %chin% c(".SD", ".N"))) ) { # Ability to turn off if problems or to benchmark the benefit + if ( getOption("datatable.optimize")>=1L && (is.call(jsub) || (is.name(jsub) && jsub %chin% c(".SD", ".N"))) ) { # Ability to turn off if problems or to benchmark the benefit # Optimization to reduce overhead of calling lapply over and over for each group oldjsub = jsub funi = 1L # Fix for #985 @@ -1653,7 +1653,7 @@ replace_dot_alias = function(e) { # otherwise there must be three arguments, and only in two cases: # 1) head/tail(x, 1) or 2) x[n], n>0 length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) && - ( (q1 %chin% c("head", "tail") && q3==1L) || ((q1 == "[" || (q1 = "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && q3>0L) ) + ( (q1 %chin% c("head", "tail") && q3==1L) || ((q1 == "[" || (q1 == "[[" && eval(call('is.atomic', q[[2L]]), envir=x))) && q3>0L) ) } if (jsub[[1L]]=="list") { GForce = TRUE From 9f7b0399d0c1924a9f7d1dc60d0074808d34cabf Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 3 Feb 2020 00:34:08 +0800 Subject: [PATCH 5/6] add whitespace for codecov --- src/chmatch.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/chmatch.c b/src/chmatch.c index 6fb3fbec2a..0ebea9a4c3 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -8,15 +8,25 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch } error(_("x is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(x))); } - if (!isString(table) && !isNull(table)) error(_("table is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(table))); - if (chin && chmatchdup) error(_("Internal error: either chin or chmatchdup should be true not both")); // # nocov + if (!isString(table) && !isNull(table)) + error(_("table is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(table))); + if (chin && chmatchdup) + error(_("Internal error: either chin or chmatchdup should be true not both")); // # nocov const int xlen = length(x); const int tablelen = length(table); // allocations up front before savetl starts in case allocs fail SEXP ans = PROTECT(allocVector(chin?LGLSXP:INTSXP, xlen)); - if (xlen==0) { UNPROTECT(1); return ans; } // no need to look at table when x is empty + if (xlen==0) { // no need to look at table when x is empty + UNPROTECT(1); + return ans; + } int *ansd = INTEGER(ans); - if (tablelen==0) { const int val=(chin?0:nomatch), n=xlen; for (int i=0; i Date: Sun, 16 Feb 2020 01:21:17 -0700 Subject: [PATCH 6/6] simplified to one new %iscall%, and avoided the coerce of sym to char --- R/between.R | 4 ++-- R/data.table.R | 63 +++++++++++++++++++++++++------------------------- R/fcast.R | 12 +++++----- R/fmelt.R | 2 +- R/nse_utils.R | 7 ------ R/setkey.R | 2 +- R/utils.R | 3 +++ src/chmatch.c | 28 +++++++++++++--------- 8 files changed, 61 insertions(+), 60 deletions(-) delete mode 100644 R/nse_utils.R diff --git a/R/between.R b/R/between.R index 054f9cce66..f5a6600da6 100644 --- a/R/between.R +++ b/R/between.R @@ -55,13 +55,13 @@ between = function(x, lower, upper, incbounds=TRUE, NAbounds=TRUE, check=FALSE) # %between% is vectorised, #534. "%between%" = function(x, y) { ysub = substitute(y) - if (sub_is_fun(ysub, ".")) { + if (ysub %iscall% ".") { ysub[[1L]]=quote(list) y = eval.parent(ysub) } if ((l <- length(y)) != 2L) { stop("RHS has length() ", l, "; expecting length 2. ", - if (sub_is_fun(ysub, 'c')) + if (ysub %iscall% 'c') sprintf("Perhaps you meant %s? ", capture.output(print(`[[<-`(ysub, 1L, quote(list))))), "The first element should be the lower bound(s); ", diff --git a/R/data.table.R b/R/data.table.R index b78dadaef8..2a21b5198b 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -102,7 +102,7 @@ replace_dot_alias = function(e) { .massagei = function(x) { # J alias for list as well in i, just if the first symbol # if x = substitute(base::order) then as.character(x[[1L]]) == c("::", "base", "order") - if (sub_in_funs(x, c("J","."))) + if (x %iscall% c("J",".")) x[[1L]] = quote(list) x } @@ -216,7 +216,7 @@ replace_dot_alias = function(e) { jsub = replace_dot_alias(substitute(j)) root = if (is.call(jsub)) as.character(jsub[[1L]])[1L] else "" if (root == ":" || - (root %chin% c("-","!") && sub_is_fun(jsub[[2L]], '(') && sub_is_fun(jsub[[2L]][[2L]], ':')) || + (root %chin% c("-","!") && jsub[[2L]] %iscall% '(' && jsub[[2L]][[2L]] %iscall% ':') || ( (!length(av<-all.vars(jsub)) || all(substring(av,1L,2L)=="..")) && root %chin% c("","c","paste","paste0","-","!") && missingby )) { # test 763. TODO: likely that !missingby iff with==TRUE (so, with can be removed) @@ -259,7 +259,7 @@ replace_dot_alias = function(e) { if (length(jsub) == 2L) { jsub = jsub[[2L]] # to allow {} wrapping of := e.g. [,{`:=`(...)},] [#376] root = if (is.call(jsub)) as.character(jsub[[1L]])[1L] else "" - } else if (length(jsub) > 2L && sub_is_fun(jsub[[2L]], ":=")) { + } else if (length(jsub) > 2L && jsub[[2L]] %iscall% ":=") { #2142 -- j can be {} and have length 1 stop("You have wrapped := with {} which is ok but then := must be the only thing inside {}. You have something else inside {} as well. Consider placing the {} on the RHS of := instead; e.g. DT[,someCol:={tmpVar1<-...;tmpVar2<-...;tmpVar1*tmpVar2}") } @@ -323,17 +323,17 @@ replace_dot_alias = function(e) { assign(".N", nrow(x), envir=parent.frame(), inherits=FALSE) remove.N = TRUE } - if (sub_is_fun(isub, "eval")) { # TO DO: or ..() + if (isub %iscall% "eval") { # TO DO: or ..() isub = eval(.massagei(isub[[2L]]), parent.frame(), parent.frame()) if (is.expression(isub)) isub=isub[[1L]] } - if (sub_is_fun(isub, "!")) { + if (isub %iscall% "!") { notjoin = TRUE if (!missingnomatch) stop("not-join '!' prefix is present on i but nomatch is provided. Please remove nomatch."); nomatch = 0L isub = isub[[2L]] # #932 related so that !(v1 == 1) becomes v1 == 1 instead of (v1 == 1) after removing "!" - if (sub_is_fun(isub, "(") && !is.name(isub[[2L]])) + if (isub %iscall% "(" && !is.name(isub[[2L]])) isub = isub[[2L]] } @@ -632,7 +632,7 @@ replace_dot_alias = function(e) { # j was substituted before dealing with i so that := can set allow.cartesian=FALSE (#800) (used above in i logic) if (is.null(jsub)) return(NULL) - if (!with && sub_is_fun(jsub, ":=")) { + if (!with && jsub %iscall% ":=") { # TODO: make these both errors (or single long error in both cases) in next release. # i.e. using with=FALSE together with := at all will become an error. Eventually with will be removed. if (is.null(names(jsub)) && is.name(jsub[[2L]])) { @@ -646,13 +646,13 @@ replace_dot_alias = function(e) { if (!with) { # missingby was already checked above before dealing with i - if (sub_in_funs(jsub, c("!", "-")) && length(jsub)==2L) { # length 2 to only match unary, #2109 + if (jsub %iscall% c("!", "-") && length(jsub)==2L) { # length 2 to only match unary, #2109 notj = TRUE jsub = jsub[[2L]] } else notj = FALSE # fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4))) - while (sub_is_fun(jsub, "(")) jsub = as.list(jsub)[[-1L]] - if (sub_is_fun_length(jsub, ':', 3L)) { + while (jsub %iscall% "(") jsub = as.list(jsub)[[-1L]] + if (jsub %iscall% ":" && length(jsub)==3L) { j = eval(jsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) # else j will be evaluated for the first time on next line } else { names(..syms) = ..syms @@ -718,7 +718,7 @@ replace_dot_alias = function(e) { bysub = replace_dot_alias(bysub) # fix for #1298 if (is.expression(bysub)) bysub=bysub[[1L]] bysubl = as.list.default(bysub) - } else if (sub_in_funs(bysub, c("c","key","names", "intersect", "setdiff"))) { + } else if (bysub %iscall% c("c","key","names", "intersect", "setdiff")) { # catch common cases, so we don't have to copy x[irows] for all columns # *** TO DO ***: try() this eval first (as long as not list() or .()) and see if it evaluates to column names # to avoid the explicit c,key,names which already misses paste("V",1:10) for example @@ -728,11 +728,11 @@ replace_dot_alias = function(e) { tt = eval(bysub, parent.frame(), parent.frame()) if (!is.character(tt)) stop("by=c(...), key(...) or names(...) must evaluate to 'character'") bysub=tt - } else if (sub_not_funs(bysub, c("list", "as.list", "{", ".", ":"))) { + } else if (is.call(bysub) && !(bysub[[1L]] %chin% c("list", "as.list", "{", ".", ":"))) { # potential use of function, ex: by=month(date). catch it and wrap with "(", because we need to set "bysameorder" to FALSE as we don't know if the function will return ordered results just because "date" is ordered. Fixes #2670. bysub = as.call(c(as.name('('), list(bysub))) bysubl = as.list.default(bysub) - } else if (sub_is_fun(bysub, ".")) bysub[[1L]] = quote(list) + } else if (bysub %iscall% ".") bysub[[1L]] = quote(list) if (mode(bysub) == "character") { if (length(grep(",", bysub, fixed = TRUE))) { @@ -768,7 +768,7 @@ replace_dot_alias = function(e) { } if (is.null(irows)) { - if (sub_is_fun_length(bysub, ':', 3L) && is.name(bysub[[2L]]) && is.name(bysub[[3L]])) { + if (bysub %iscall% ':' && length(bysub)==3L && is.name(bysub[[2L]]) && is.name(bysub[[3L]])) { byval = eval(bysub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) byval = as.list(x)[byval] } else byval = eval(bysub, x, parent.frame()) @@ -789,7 +789,7 @@ replace_dot_alias = function(e) { if (verbose) cat("i clause present but columns used in by not detected. Having to subset all columns before evaluating 'by': '",deparse(by),"'\n",sep="") xss = x[irows,nomatch=nomatch,mult=mult,roll=roll,rollends=rollends] } - if (sub_is_fun_length(bysub, ':', 3L)) { + if (bysub %iscall% ':' && length(bysub)==3L) { byval = eval(bysub, setattr(as.list(seq_along(xss)), 'names', names(xss)), parent.frame()) byval = as.list(xss)[byval] } else byval = eval(bysub, xss, parent.frame()) @@ -925,17 +925,17 @@ replace_dot_alias = function(e) { # FR #4979 - negative numeric and character indices for SDcols colsub = substitute(.SDcols) # fix for #5190. colsub[[1L]] gave error when it's a symbol. - if (sub_in_funs(colsub, c("!", "-"))) { + if (colsub %iscall% c("!", "-")) { negate_sdcols = TRUE colsub = colsub[[2L]] } else negate_sdcols = FALSE # fix for #1216, make sure the parentheses are peeled from expr of the form (((1:4))) - while(sub_is_fun(colsub, "(")) colsub = as.list(colsub)[[-1L]] - if (sub_is_fun_length(colsub, ':', 3L)) { + while(colsub %iscall% "(") colsub = as.list(colsub)[[-1L]] + if (colsub %iscall% ':' && length(colsub)==3L) { # .SDcols is of the format a:b .SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) } else { - if (sub_is_fun(colsub, 'patterns')) { + if (colsub %iscall% 'patterns') { # each pattern gives a new filter condition, intersect the end result .SDcols = Reduce(intersect, do_patterns(colsub, names_x)) } else { @@ -1003,7 +1003,7 @@ replace_dot_alias = function(e) { } # Do not include z in .SD when dt[, z := {.SD; get("x")}, .SDcols = "y"] (#2326, #2338) - if (sub_is_fun(jsub, ':=') && is.symbol(jsub[[2L]])) { + if (jsub %iscall% ':=' && is.symbol(jsub[[2L]])) { jsub_lhs_symbol = as.character(jsub[[2L]]) if (jsub_lhs_symbol %chin% non_sdvars) { sdvars = setdiff(sdvars, jsub_lhs_symbol) @@ -1110,7 +1110,7 @@ replace_dot_alias = function(e) { setalloccol(x, n, verbose=verbose) # always assigns to calling scope; i.e. this scope if (is.name(name)) { assign(as.character(name),x,parent.frame(),inherits=TRUE) - } else if (sub_in_funs(name, c('$', '[[')) && is.name(name[[2L]])) { + } else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) { k = eval(name[[2L]], parent.frame(), parent.frame()) if (is.list(k)) { origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame()) @@ -1300,7 +1300,7 @@ replace_dot_alias = function(e) { jval = copy(jval) } else if ( length(jcpy <- which(vapply_1c(jval, address) %chin% vapply_1c(SDenv, address))) ) { for (jidx in jcpy) jval[[jidx]] = copy(jval[[jidx]]) - } else if (sub_is_fun(jsub, 'get')) { + } else if (jsub %iscall% 'get') { jval = copy(jval) # fix for #1212 } } @@ -1310,7 +1310,7 @@ replace_dot_alias = function(e) { .Call(Cassign,x,irows,cols,newnames,jval) return(suppPrint(x)) } - if ((sub_not_fun(jsub, 'get') && is.list(jval) && !is.object(jval)) || !missingby) { + if ((is.call(jsub) && jsub[[1L]] != "get" && is.list(jval) && !is.object(jval)) || !missingby) { # is.call: selecting from a list column should return list # is.object: for test 168 and 168.1 (S4 object result from ggplot2::qplot). Just plain list results should result in data.table @@ -1487,7 +1487,7 @@ replace_dot_alias = function(e) { txt = as.list(jsub)[-1L] if (length(names(txt))>1L) .Call(Csetcharvec, names(txt), 2L, "") # fixes bug #4839 fun = txt[[2L]] - if (sub_is_fun(fun, "function")) { + if (fun %iscall% "function") { # Fix for #2381: added SDenv$.SD to 'eval' to take care of cases like: lapply(.SD, function(x) weighted.mean(x, bla)) where "bla" is a column in DT # http://stackoverflow.com/questions/13441868/data-table-and-stratified-means # adding this does not compromise in speed (that is, not any lesser than without SDenv$.SD) @@ -1594,7 +1594,7 @@ replace_dot_alias = function(e) { jvnames = c(jvnames, jn__) jsubl[[i_]] = jl__ } - } else if (sub_in_funs_length(this, optfuns, 1L, `>`)) { + } else if (this %iscall% optfuns && length(this)>1L) { jvnames = c(jvnames, if (is.null(names(jsubl))) "" else names(jsubl)[i_]) } else if ( length(this) == 3L && (this[[1L]] == "[" || this[[1L]] == "head") && this[[2L]] == ".SD" && (is.numeric(this[[3L]]) || this[[3L]] == ".N") ) { @@ -1641,7 +1641,7 @@ replace_dot_alias = function(e) { if (getOption("datatable.optimize")>=2L && !is.data.table(i) && !byjoin && length(f__) && !length(lhs)) { if (!length(ansvars) && !use.I) { GForce = FALSE - if ( (is.name(jsub) && jsub == ".N") || (sub_is_fun_length(jsub, 'list', 2L) && jsub[[2L]] == ".N") ) { + if ( (is.name(jsub) && jsub==".N") || (jsub %iscall% 'list' && length(jsub)==2L && jsub[[2L]]==".N") ) { GForce = TRUE if (verbose) cat("GForce optimized j to '",deparse(jsub, width.cutoff=200L, nlines=1L),"'\n",sep="") } @@ -1687,9 +1687,8 @@ replace_dot_alias = function(e) { oldjsub = jsub if (jsub[[1L]]=="list") { # Addressing #1369, #2949 and #1974. This used to be 30s (vs 0.5s) with 30K elements items in j, #1470. Could have been dotN() and/or the for-looped if() - # jsub[[1]]=="list" so the first item will always be FALSE - # is.symbol() for when expanded function definition is used instead of function names; #1369 results in (function(x) sum(x)) as jsub[[.]] from dcast.data.table - todo = sapply(jsub, sub_is_fun, 'mean') + # jsub[[1]]=="list" so the first item of todo will always be FALSE + todo = sapply(jsub, `%iscall%`, 'mean') if (any(todo)) { w = which(todo) jsub[w] = lapply(jsub[w], .optmean) @@ -2751,7 +2750,7 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { if (is.name(name)) { name = as.character(name) assign(name, x, parent.frame(), inherits=TRUE) - } else if (sub_in_funs(name, c('$', '[[')) && is.name(name[[2L]])) { + } else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) { # common case is call from 'lapply()' k = eval(name[[2L]], parent.frame(), parent.frame()) if (is.list(k)) { @@ -3018,10 +3017,10 @@ isReallyReal = function(x) { #' 'ops': integer vector. Gives the indices of the operators that connect the columns in x and i. ops = c("==", "<=", "<", ">=", ">", "!=") pat = paste0("(", ops, ")", collapse="|") - if (sub_is_fun(onsub, 'eval')) { + if (onsub %iscall% 'eval') { onsub = eval(onsub[[2L]], parent.frame(2L), parent.frame(2L)) } - if (sub_in_funs(onsub, c('list', '.'))) { + if (onsub %iscall% c('list', '.')) { spat = paste0("[ ]+(", pat, ")[ ]+") onsub = lapply(as.list(onsub)[-1L], function(x) gsub(spat, "\\1", deparse(x, width.cutoff=500L))) onsub = as.call(c(quote(c), onsub)) diff --git a/R/fcast.R b/R/fcast.R index 8efcd8779c..91613960e8 100644 --- a/R/fcast.R +++ b/R/fcast.R @@ -39,9 +39,9 @@ check_formula = function(formula, varnames, valnames) { deparse_formula = function(expr, varnames, allvars) { lvars = lapply(expr, function(this) { - if (sub_is_fun(this, '+')) { - unlist(deparse_formula(as.list(this)[-1L], varnames, allvars)) - } else if (is.name(this) && this == quote(`...`)) { + if (this %iscall% '+') { + unlist(deparse_formula(as.list(this)[-1L], varnames, allvars)) + } else if (is.name(this) && this==quote(`...`)) { subvars = setdiff(varnames, allvars) lapply(subvars, as.name) } else this @@ -61,11 +61,11 @@ value_vars = function(value.var, varnames) { } aggregate_funs = function(funs, vals, sep="_", ...) { - if (sub_is_fun(funs, 'eval')) + if (funs %iscall% 'eval') funs = eval(funs[[2L]], parent.frame(2L), parent.frame(2L)) - if (sub_in_funs(funs, c('c', 'list'))) { + if (funs %iscall% c('c', 'list')) { funs = lapply(as.list(funs)[-1L], function(x) { - if (sub_in_funs(x, c('c', 'list'))) as.list(x)[-1L] else x + if (x %iscall% c('c', 'list')) as.list(x)[-1L] else x }) } else funs = eval(funs, parent.frame(2L), parent.frame(2L)) if(is.function(funs)) funs = list(funs) # needed for cases as shown in test#1700.1 diff --git a/R/fmelt.R b/R/fmelt.R index e07d87e28d..12dd9fa5ac 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -35,7 +35,7 @@ melt.data.table = function(data, id.vars, measure.vars, variable.name = "variabl if (missing(id.vars)) id.vars=NULL if (missing(measure.vars)) measure.vars = NULL measure.sub = substitute(measure.vars) - if (sub_is_fun(measure.sub, "patterns")) { + if (measure.sub %iscall% "patterns") { measure.vars = do_patterns(measure.sub, names(data)) } if (is.list(measure.vars) && length(measure.vars) > 1L) { diff --git a/R/nse_utils.R b/R/nse_utils.R deleted file mode 100644 index 87a7e3577f..0000000000 --- a/R/nse_utils.R +++ /dev/null @@ -1,7 +0,0 @@ -sub_is_fun = function(e, fun) is.call(e) && is.symbol(e[[1L]]) && e[[1L]] == fun -sub_is_fun_length = function(e, fun, n, compare = `==`) is.call(e) && compare(length(e), n) && is.symbol(e[[1L]]) && e[[1L]] == fun -sub_not_fun = function(e, fun) is.call(e) && is.symbol(e[[1L]]) && e[[1L]] != fun -# TODO: chmatch could work on expressions directly for conciseness here -sub_in_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && e[[1L]] %chin% funs -sub_in_funs_length = function(e, funs, n, compare = `==`) is.call(e) && compare(length(e), n) && is.symbol(e[[1L]]) && e[[1L]] %chin% funs -sub_not_funs = function(e, funs) is.call(e) && is.symbol(e[[1L]]) && !e[[1L]] %chin% funs diff --git a/R/setkey.R b/R/setkey.R index f005e3bcbd..334ca1e801 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -200,7 +200,7 @@ forder = function(..., na.last=TRUE, decreasing=FALSE) # We intercept the unevaluated expressions and massage them before evaluating in with(DT) scope or not depending on the first item. for (i in seq.int(2L, length(sub))) { v = sub[[i]] - while (sub_in_funs_length(v, c('-', '+'), 2L)) { + while (v %iscall% c('-', '+') && length(v)==2L) { if (v[[1L]] == "-") asc[i-1L] = -asc[i-1L] sub[[i]] = v = v[[2L]] # remove the leading +/- which is the 2nd item since length(v)==2; i.e. monadic +/- } diff --git a/R/utils.R b/R/utils.R index 7a355e4e8b..42e67ea8de 100644 --- a/R/utils.R +++ b/R/utils.R @@ -132,6 +132,9 @@ is_utc = function(tz) { return(tz %chin% utc_tz) } +# very nice idea from Michael to avoid expression repetition (risk) in internal code, #4226 +"%iscall%" = function(e, f) { is.call(e) && e[[1L]] %chin% f } + # nocov start #593 always return a data.table edit.data.table = function(name, ...) { setDT(NextMethod('edit', name))[] diff --git a/src/chmatch.c b/src/chmatch.c index 0ebea9a4c3..c0f3397d6a 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -1,26 +1,32 @@ #include "data.table.h" static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatchdup) { - if (!isString(x) && !isNull(x)) { - // for use in sub_*_funs from nse_utils.R - if (TYPEOF(x) == SYMSXP) { - return chmatchMain(coerceVector(x, STRSXP), table, nomatch, chin, chmatchdup); - } - error(_("x is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(x))); - } if (!isString(table) && !isNull(table)) error(_("table is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(table))); if (chin && chmatchdup) error(_("Internal error: either chin or chmatchdup should be true not both")); // # nocov + SEXP sym = NULL; const int xlen = length(x); - const int tablelen = length(table); + if (TYPEOF(x) == SYMSXP) { + if (xlen!=1) + error(_("Internal error: length of SYMSXP is %d not 1"), xlen); // # nocov + sym = PRINTNAME(x); // so we can do &sym to get a length 1 (const SEXP *)STRING_PTR(x) and save an alloc for coerce to STRSXP + } else if (!isString(x) && !isSymbol(x) && !isNull(x)) { + if (chin && !isVectorAtomic(x)) { + return ScalarLogical(FALSE); + // commonly type 'language' returns FALSE here, to make %iscall% simpler; e.g. #1369 results in (function(x) sum(x)) as jsub[[.]] from dcast.data.table + } else { + error(_("x is type '%s' (must be 'character' or NULL)"), type2char(TYPEOF(x))); + } + } // allocations up front before savetl starts in case allocs fail SEXP ans = PROTECT(allocVector(chin?LGLSXP:INTSXP, xlen)); - if (xlen==0) { // no need to look at table when x is empty + if (xlen==0) { // no need to look at table when x is empty (including null) UNPROTECT(1); return ans; } int *ansd = INTEGER(ans); + const int tablelen = length(table); if (tablelen==0) { const int val=(chin?0:nomatch), n=xlen; for (int i=0; i