Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 67 additions & 2 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,71 @@ replace_dot_alias = function(e) {
e
}

.massagei = function(x) {
.massagei = function(x, dt=NULL, verbose=FALSE, ienv=NULL) {
# J alias for list as well in i, just if the first symbol
# if x = substitute(base::order) then as.character(x[[1L]]) == c("::", "base", "order")
if (x %iscall% c("J","."))
x[[1L]] = quote(list)
# optimize order() to forderv(); evaluates: decreasing, method, na.last. #3023, possibly #3261 as well
if (!is.null(dt) && getOption("datatable.optimize")>=1L && x%iscall%"order") {
call.nm = names(x)
## escape unsupported method
if ("method" %chin% call.nm) {
method = x[["method"]]
if (!is.character(method)) method = eval(method, ienv)
if (!identical(method, "radix")) return(x)
}
## escape invalid decreasing
if ("decreasing" %chin% call.nm) {
decreasing = x[["decreasing"]]
if (!is.logical(decreasing)) decreasing = eval(decreasing, ienv)
if (!is.logical(decreasing) || !length(decreasing) || anyNA(decreasing)) return(x) ## outsource raising error
} else decreasing = NULL
## escape invalid na.last
if ("na.last" %chin% call.nm) {
na.last = x[["na.last"]]
if (!is.logical(na.last)) na.last = eval(na.last, ienv)
if (!is.logical(na.last)) return(x) ## outsource raising error
} else na.last = TRUE
## decompose variables in dots
order.args = c("decreasing","method","na.last") ## formalArgs(order) - "...", tested in main.Rraw
order.call = if (!is.null(call.nm)) x[!call.nm %chin% order.args] else x
dots = as.list(order.call[-1])
## escapy empty input
if (!length(dots)) return(x)
order.vars = all.vars(order.call)
## escape constant order(x, 1L)
if (length(dots)!=length(order.vars)) return(x)
## escape for any non-dt var
if (any(!order.vars %chin% names(dt))) return(x)
## escape for any unsupported type
supported = c("integer","double","logical","character","complex")
if (any(vapply(order.vars, function(v) !typeof(dt[[v]])%chin%supported, NA))) return(x) ## outsource raising error
## decreasing recycle
decreasing = if (is.null(decreasing)) rep(FALSE, length(order.vars)) else {
if (length(decreasing)!=1L && length(decreasing)!=length(order.vars)) return(x) ## outsource raising error
if (length(decreasing)==1L && length(order.vars)>1L) rep(decreasing, length(order.vars)) else decreasing
}
## forderv arguments
by = vector("character", length(order.vars))
order = rep.int(1L, length(order.vars))
order[decreasing] = -1L
## language objects for each of order dots element
for (i in seq_along(dots)) {
dot = dots[[i]]
while (dot %iscall% c("-", "+") && length(dot)==2L) {
if (dot[[1L]]=="-") order[i] = -order[i]
dot = dot[[2L]]
}
if (is.symbol(dot)) {
var = as.character(dot)
if (!var %chin% order.vars) stop("internal error: a dots element is symbol but is not any of order.vars, should have been caught already") # nocov
by[i] = var
} else return(x)
}
x = as.call(list(quote(forderv), quote(x), by=by, retGrp=FALSE, sort=TRUE, order=order, na.last=na.last))
if (verbose) cat(sprintf("order call in 'i' optimized to '%s'\n", deparse(x, width.cutoff=500L)[1L]))
}
x
}

Expand Down Expand Up @@ -355,8 +415,13 @@ replace_dot_alias = function(e) {
}
else if (!is.name(isub)) {
ienv = new.env(parent=parent.frame())
isub = .massagei(isub, dt=x, verbose=verbose, ienv=ienv)
## this functionality has been moved to .massagei+forderv (#3023) branch below, but this forder will be still used when a variable in `order` is not a DT column, or order(x, (y)), order(-(x)), etc
if (getOption("datatable.optimize")>=1L) assign("order", forder, ienv)
i = tryCatch(eval(.massagei(isub), x, ienv), error=function(e) {
i = if (is.call(isub) && isub[[1L]]==quote(forderv)) { ## order has been optimized to forderv #3023
fo = eval(isub) ## forderv(x, ...)
if (!length(fo)) seq_len(nrow(x)) else fo
} else tryCatch(eval(isub, x, ienv), error=function(e) {
if (grepl(":=.*defined for use in j.*only", e$message))
stop("Operator := detected in i, the first argument inside DT[...], but is only valid in the second argument, j. Most often, this happens when forgetting the first comma (e.g. DT[newvar := 5] instead of DT[ , new_var := 5]). Please double-check the syntax. Run traceback(), and debugger() to get a line number.")
else
Expand Down
16 changes: 9 additions & 7 deletions R/setkey.R
Original file line number Diff line number Diff line change
Expand Up @@ -186,18 +186,18 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las
.Call(Cforder, x, by, retGrp, sort, order, na.last) # returns integer() if already sorted, regardless of sort=TRUE|FALSE
}

forder = function(..., na.last=TRUE, decreasing=FALSE)
forder = function(..., na.last=TRUE, decreasing=FALSE, method=c("auto","shell","radix"))
{
if (!missing(method) && !identical(method, "radix")) return(base::order(...=..., na.last=na.last, decreasing=decreasing, method=method))
sub = substitute(list(...))
tt = sapply(sub, function(x) is.null(x) || (is.symbol(x) && !nzchar(x)))
if (any(tt)) sub[tt] = NULL # remove any NULL or empty arguments; e.g. test 1962.052: forder(DT, NULL) and forder(DT, )
if (any(tt)) stop("[f]order argument ", paste(which(tt)-1L, collapse=", "), " is NULL or empty") # raises error consistent to base::order, invalidates e.g. test 1962.052: forder(DT, NULL) and forder(DT, )
if (length(sub)<2L) return(NULL) # forder() with no arguments returns NULL consistent with base::order
asc = rep.int(1L, length(sub)-1L) # ascending (1) or descending (-1) per column
# the idea here is to intercept - (and unusual --+ deriving from built expressions) before vectors in forder(DT, -colA, colB) so that :
# 1) - on character vector works; ordinarily in R that fails with type error
# 2) each column/expression can have its own +/- more easily that having to use a separate decreasing=TRUE/FALSE
# 3) we can pass the decreasing (-) flag to C and avoid what normally happens in R; i.e. allocate a new vector and apply - to every element first
# We intercept the unevaluated expressions and massage them before evaluating in with(DT) scope or not depending on the first item.
for (i in seq.int(2L, length(sub))) {
v = sub[[i]]
while (v %iscall% c('-', '+') && length(v)==2L) {
Expand All @@ -219,10 +219,12 @@ forder = function(..., na.last=TRUE, decreasing=FALSE)
} else {
data = eval(sub, parent.frame(), parent.frame())
}
stopifnot(isTRUEorFALSE(decreasing))
o = forderv(data, seq_along(data), sort=TRUE, retGrp=FALSE, order= if (decreasing) -asc else asc, na.last)
if (!length(o) && length(data)>=1L) o = seq_along(data[[1L]]) else o
o
if (!is.logical(decreasing) || anyNA(decreasing)) stop("'decreasing' must be logical non-NA")
if (length(decreasing)!=1L && length(decreasing)!=length(data)) stop("'decreasing' must be either length 1, or length of the variables passed to [f]order")
asc[decreasing] = -(asc[decreasing])
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[.integer automatically recycle scalar decreasing

o = forderv(data, seq_along(data), sort=TRUE, retGrp=FALSE, order=asc, na.last=na.last)
o = if (!length(o) && length(data)>=1L) seq_along(data[[1L]]) else o
if (is.na(na.last)) o[as.logical(o)] else o ## remove zeros, as base order #4346
}

fsort = function(x, decreasing=FALSE, na.last=FALSE, internal=FALSE, verbose=FALSE, ...)
Expand Down
Loading