diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd index 3c8a2ee0ae..1dfec0a02a 100644 --- a/.dev/CRAN_Release.cmd +++ b/.dev/CRAN_Release.cmd @@ -154,7 +154,10 @@ grep -n "[^A-Za-z0-9]F[^A-Za-z0-9]" ./inst/tests/tests.Rraw grep -Enr "^[^#]*(?:\[|==|>|<|>=|<=|,|\(|\+)\s*[-]?[0-9]+[^0-9L:.e]" R | grep -Ev "stop|warning|tolerance" # Never use ifelse. fifelse for vectors when necessary (nothing yet) - grep -Enr "\bifelse" R +grep -Enr "\bifelse" R + +# use substr() instead of substring(), #4447 +grep -Fnr "substring" R # No system.time in main tests.Rraw. Timings should be in benchmark.Rraw grep -Fn "system.time" ./inst/tests/*.Rraw | grep -Fv "benchmark.Rraw" | grep -Fv "this system.time usage ok" diff --git a/.dev/revdep.R b/.dev/revdep.R index c172eb163f..38c5a93a66 100644 --- a/.dev/revdep.R +++ b/.dev/revdep.R @@ -157,7 +157,7 @@ status0 = function(bioc=FALSE) { if (file.exists(fn)) { v = suppressWarnings(system(paste0("grep 'Status:' ",fn), intern=TRUE)) if (!length(v)) return("RUNNING") - return(substring(v,9)) + return(substr(v, 9L, nchar(v))) } if (file.exists(paste0("./",x,".Rcheck"))) return("RUNNING") return("NOT STARTED") diff --git a/R/data.table.R b/R/data.table.R index 392599da71..79b8e6483d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -249,7 +249,7 @@ replace_dot_alias = function(e) { root = if (is.call(jsub)) as.character(jsub[[1L]])[1L] else "" if (root == ":" || (root %chin% c("-","!") && jsub[[2L]] %iscall% '(' && jsub[[2L]][[2L]] %iscall% ':') || - ( (!length(av<-all.vars(jsub)) || all(substring(av,1L,2L)=="..")) && + ( (!length(av<-all.vars(jsub)) || all(startsWith(av, ".."))) && root %chin% c("","c","paste","paste0","-","!") && missingby )) { # test 763. TODO: likely that !missingby iff with==TRUE (so, with can be removed) # When no variable names (i.e. symbols) occur in j, scope doesn't matter because there are no symbols to find. @@ -266,8 +266,8 @@ replace_dot_alias = function(e) { with=FALSE if (length(av)) { for (..name in av) { - name = substring(..name, 3L) - if (name=="") stop("The symbol .. is invalid. The .. prefix must be followed by at least one character.") + name = substr(..name, 3L, nchar(..name)) + if (!nzchar(name)) stop("The symbol .. is invalid. The .. prefix must be followed by at least one character.") if (!exists(name, where=parent.frame())) { stop("Variable '",name,"' is not found in calling scope. Looking in calling scope because you used the .. prefix.", if (exists(..name, where=parent.frame())) @@ -283,7 +283,7 @@ replace_dot_alias = function(e) { ..syms = av } } else if (is.name(jsub)) { - if (substring(jsub, 1L, 2L) == "..") stop("Internal error: DT[, ..var] should be dealt with by the branch above now.") # nocov + if (startsWith(as.character(jsub), "..")) stop("Internal error: DT[, ..var] should be dealt with by the branch above now.") # nocov if (!with && !exists(as.character(jsub), where=parent.frame())) stop("Variable '",jsub,"' is not found in calling scope. Looking in calling scope because you set with=FALSE. Also, please use .. symbol prefix and remove with=FALSE.") } @@ -709,7 +709,7 @@ replace_dot_alias = function(e) { j = eval(jsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) # else j will be evaluated for the first time on next line } else { names(..syms) = ..syms - j = eval(jsub, lapply(substring(..syms,3L), get, pos=parent.frame()), parent.frame()) + j = eval(jsub, lapply(substr(..syms, 3L, nchar(..syms)), get, pos=parent.frame()), parent.frame()) } if (is.logical(j)) j <- which(j) if (!length(j) && !notj) return( null.data.table() ) @@ -815,7 +815,7 @@ replace_dot_alias = function(e) { # TODO: could be allowed if length(irows)>1 but then the index would need to be squashed for use by uniqlist, #3062 # find if allbyvars is leading subset of any of the indices; add a trailing "__" to fix #3498 where a longer column name starts with a shorter column name tt = paste0(c(allbyvars,""), collapse="__") - w = which.first(substring(paste0(indices(x),"__"),1L,nchar(tt)) == tt) + w = which.first(startsWith(paste0(indices(x), "__"), tt)) if (!is.na(w)) { byindex = indices(x)[w] if (!length(getindex(x, byindex))) { @@ -921,8 +921,8 @@ replace_dot_alias = function(e) { jvnames = NULL drop_dot = function(x) { if (length(x)!=1L) stop("Internal error: drop_dot passed ",length(x)," items") # nocov - if (identical(substring(x<-as.character(x), 1L, 1L), ".") && x %chin% c(".N", ".I", ".GRP", ".NGRP", ".BY")) - substring(x, 2L) + if (startsWith(x<-as.character(x), ".") && x %chin% c(".N", ".I", ".GRP", ".NGRP", ".BY")) + substr(x, 2L, nchar(x)) else x } @@ -1242,8 +1242,8 @@ replace_dot_alias = function(e) { } syms = all.vars(jsub) - syms = syms[ substring(syms,1L,2L)==".." ] - syms = syms[ substring(syms,3L,3L)!="." ] # exclude ellipsis + syms = syms[ startsWith(syms, "..") ] + syms = syms[ substr(syms, 3L, 3L) != "." ] # exclude ellipsis for (sym in syms) { if (sym %chin% names_x) { # if "..x" exists as column name, use column, for backwards compatibility; e.g. package socialmixr in rev dep checks #2779 @@ -1251,7 +1251,7 @@ replace_dot_alias = function(e) { # TODO in future, as warned in NEWS item for v1.11.0 : # warning(sym," in j is looking for ",getName," in calling scope, but a column '", sym, "' exists. Column names should not start with ..") } - getName = substring(sym, 3L) + getName = substr(sym, 3L, nchar(sym)) if (!exists(getName, parent.frame())) { if (exists(sym, parent.frame())) next # user did 'manual' prefix; i.e. variable in calling scope has .. prefix stop("Variable '",getName,"' is not found in calling scope. Looking in calling scope because this symbol was prefixed with .. in the j= parameter.") @@ -1731,8 +1731,9 @@ replace_dot_alias = function(e) { # is.symbol() is for #1369, #1974 and #2949 if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE) if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE) # 875 - if ((length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) && (!q1 %chin% c("head","tail"))) return(TRUE) - # ... head-tail uses default value n=6 which as of now should not go gforce ^^ + if ((length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na"))) && (!q1 %chin% c("head","tail"))) return(TRUE) + # ^^ base::startWith errors on NULL unfortunately + # head-tail uses default value n=6 which as of now should not go gforce ... ^^ # otherwise there must be three arguments, and only in two cases: # 1) head/tail(x, 1) or 2) x[n], n>0 length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) && @@ -1907,7 +1908,7 @@ replace_dot_alias = function(e) { if (length(expr)==2L) # no parameters passed to mean, so defaults of trim=0 and na.rm=FALSE return(call(".External",quote(Cfastmean),expr[[2L]], FALSE)) # return(call(".Internal",expr)) # slightly faster than .External, but R now blocks .Internal in coerce.c from apx Sep 2012 - if (length(expr)==3L && identical("na",substring(names(expr)[3L], 1L, 2L))) # one parameter passed to mean() + if (length(expr)==3L && startsWith(names(expr)[3L], "na")) # one parameter passed to mean() return(call(".External",quote(Cfastmean),expr[[2L]], expr[[3L]])) # faster than .Call assign("nomeanopt",TRUE,parent.frame()) expr # e.g. trim is not optimized, just na.rm diff --git a/R/fread.R b/R/fread.R index 236a30bb76..eb765fe639 100644 --- a/R/fread.R +++ b/R/fread.R @@ -55,13 +55,11 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (input=="" || length(grep('\\n|\\r', input))) { # input is data itself containing at least one \n or \r } else { - if (substring(input,1L,1L)==" ") { + if (startsWith(input, " ")) { stop("input= contains no \\n or \\r, but starts with a space. Please remove the leading space, or use text=, file= or cmd=") } - str6 = substring(input,1L,6L) # avoid grepl() for #2531 - str7 = substring(input,1L,7L) - str8 = substring(input,1L,8L) - if (str7=="ftps://" || str8=="https://") { + str7 = substr(input, 1L, 7L) # avoid grepl() for #2531 + if (str7=="ftps://" || startsWith(input, "https://")) { # nocov start if (!requireNamespace("curl", quietly = TRUE)) stop("Input URL requires https:// connection for which fread() requires 'curl' package which cannot be found. Please install 'curl' using 'install.packages('curl')'.") # nocov @@ -71,7 +69,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") on.exit(unlink(tmpFile), add=TRUE) # nocov end } - else if (str6=="ftp://" || str7== "http://" || str7=="file://") { + else if (startsWith(input, "ftp://") || str7== "http://" || str7=="file://") { # nocov start method = if (str7=="file://") "internal" else getOption("download.file.method", default="auto") # force "auto" when file:// to ensure we don't use an invalid option (e.g. wget), #1668 @@ -107,12 +105,10 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (data.table) 'data.table' else 'data.frame', ".") return(if (data.table) data.table(NULL) else data.frame(NULL)) } - ext2 = substring(file, nchar(file)-2L, nchar(file)) # last 3 characters ".gz" - ext3 = substring(file, nchar(file)-3L, nchar(file)) # last 4 characters ".bz2" - if (ext2==".gz" || ext3==".bz2") { + if ((is_gz <- endsWith(file, ".gz")) || endsWith(file, ".bz2")) { if (!requireNamespace("R.utils", quietly = TRUE)) stop("To read gz and bz2 files directly, fread() requires 'R.utils' package which cannot be found. Please install 'R.utils' using 'install.packages('R.utils')'.") # nocov - FUN = if (ext2==".gz") gzfile else bzfile + FUN = if (is_gz) gzfile else bzfile R.utils::decompressFile(file, decompFile<-tempfile(tmpdir=tmpdir), ext=NULL, FUN=FUN, remove=FALSE) # ext is not used by decompressFile when destname is supplied, but isn't optional file = decompFile # don't use 'tmpFile' symbol again, as tmpFile might be the http://domain.org/file.csv.gz download on.exit(unlink(decompFile), add=TRUE) @@ -174,9 +170,10 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") yaml_border_re = '^#?---' if (!grepl(yaml_border_re, first_line)) { close(f) - stop('Encountered <', substring(first_line, 1L, 50L), if (nchar(first_line) > 50L) '...', '> at the first ', - 'unskipped line (', 1L+skip, '), which does not constitute the start to a valid YAML header ', - '(expecting something matching regex "', yaml_border_re, '"); please check your input and try again.') + stop(gettextf( + 'Encountered <%s%s> at the first unskipped line (%d), which does not constitute the start to a valid YAML header (expecting something matching regex "%s"); please check your input and try again.', + substr(first_line, 1L, 50L), if (nchar(first_line) > 50L) '...' else '', 1L+skip, yaml_border_re + )) } yaml_comment_re = '^#' diff --git a/R/test.data.table.R b/R/test.data.table.R index da12144f66..cf778c68b6 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -215,10 +215,10 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F compactprint = function(DT, topn=2L) { tt = vapply_1c(DT,function(x)class(x)[1L]) tt[tt=="integer64"] = "i64" - tt = substring(tt, 1L, 3L) + tt = substr(tt, 1L, 3L) makeString = function(x) paste(x, collapse = ",") # essentially toString.default cn = paste0(" [Key=",makeString(key(DT)), - " Types=", makeString(substring(sapply(DT, typeof), 1L, 3L)), + " Types=", makeString(substr(sapply(DT, typeof), 1L, 3L)), " Classes=", makeString(tt), "]") if (nrow(DT)) { print(copy(DT)[,(cn):="",verbose=FALSE], topn=topn, class=FALSE) diff --git a/R/utils.R b/R/utils.R index 75a45b8991..7a698131c6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -25,6 +25,13 @@ if (base::getRversion() < "3.2.0") { # Apr 2015 isNamespaceLoaded = function(x) x %chin% loadedNamespaces() } +if (!exists('startsWith', 'package:base', inherits=FALSE)) { # R 3.3.0; Apr 2016 + startsWith = function(x, stub) substr(x, 1L, nchar(stub))==stub +} +if (!exists('endsWith', 'package:base', inherits=FALSE)) { + endsWith = function(x, stub) {n=nchar(x); substr(x, n-nchar(stub)+1L, n)==stub} +} + # which.first which.first = function(x) { diff --git a/inst/tests/benchmark.Rraw b/inst/tests/benchmark.Rraw index 1c8bf146a6..bf0bf77e9f 100644 --- a/inst/tests/benchmark.Rraw +++ b/inst/tests/benchmark.Rraw @@ -161,10 +161,10 @@ set.seed(1) L = lapply(1:1e6, sample, x=100, size=2) x = capture.output(fwrite(L)) test(1742.1, nchar(x), c(2919861L, 2919774L)) # tests 2 very long lines, too -test(1742.2, substring(x,1,10), c("27,58,21,9","38,91,90,6")) +test(1742.2, substr(x, 1L, 10L), c("27,58,21,9", "38,91,90,6")) test(1742.3, L[[1L]], c(27L,38L)) test(1742.4, L[[1000000L]], c(76L, 40L)) -test(1742.5, substring(x,nchar(x)-10,nchar(x)), c("50,28,95,76","62,87,23,40")) +test(1742.5, substr(x, nchar(x)-10L, nchar(x)), c("50,28,95,76","62,87,23,40")) # Add scaled-up non-ASCII forder test 1896 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5b21448fa6..668b63ff8c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3470,7 +3470,7 @@ test(1100, dt1[dt2,roll=-Inf,rollends=c(FALSE,TRUE)]$ind, INT(NA,NA,1,2,2,2,2,2, set.seed(3) DT = data.table(a=5:1, b=runif(5)) ans = dcast(DT, a ~ b, value.var="b")[c(4,.N), c(2,6)] - setnames(ans, substring(names(ans),1,6)) + setnames(ans, substr(names(ans), 1L, 6L)) test(1102.06, ans, data.table("0.1680"=c(NA,DT[1,b]), "0.8075"=c(DT[2,b],NA))) # Fix for case 2 in bug report #71 - dcast didn't aggregate properly when formula RHS has "." @@ -7346,7 +7346,7 @@ test(1530.4, which.last(x), tail(which(x), 1L)) set.seed(2L) x = apply(matrix(sample(letters, 12), nrow=2), 1, paste, collapse="") y = factor(sample(c(letters[1:5], x), 20, TRUE)) -xsub = substring(x, 1L, 1L) +xsub = substr(x, 1L, 1L) test(1532.01, y %like% xsub[1L], grepl(xsub[1L], y)) test(1532.02, y %like% xsub[2L], grepl(xsub[2L], y)) test(1532.03, like(y, xsub[1L]), grepl(xsub[1L], y)) @@ -9564,7 +9564,7 @@ nqjoin_test <- function(x, y, k=1L, test_no, mult="all") { runcmb = as.data.table(runcmb[, 1:min(100L, ncol(runcmb)), drop=FALSE]) # max 100 combinations to test runops = lapply(runcmb, function(cols) { thisops = sample(ops, k, TRUE) - thisops[substring(cols,1,1)=="c"] = "==" + thisops[startsWith(cols, "c")] = "==" thisops }) is_only_na <- function(x) is.na(x) & !is.nan(x) @@ -17728,3 +17728,15 @@ if (test_bit64) { test(2193.2, X[Y, `:=`(y=i.y), on="x", by=.EACHI], data.table(x=1:3, y=as.integer64(10L,20L,NA))) } +# compatibility of endsWith backport with base::endsWith +if (exists('endsWith', 'package:base', inherits=FALSE)) { + DTendsWith = function(x, stub) {n=nchar(x); substr(x, n-nchar(stub)+1L, n)==stub} + BSendsWith = base::endsWith + test(2194.1, DTendsWith('abcd', 'd'), BSendsWith('abcd', 'd')) + test(2194.2, DTendsWith(letters, 'e'), BSendsWith(letters, 'e')) + test(2194.3, DTendsWith(NA_character_, 'a'), BSendsWith(NA_character_, 'a')) + test(2194.4, DTendsWith(character(), 'a'), BSendsWith(character(), 'a')) + # file used in encoding tests + txt = readLines(testDir("issue_563_fread.txt")) + test(2194.5, DTendsWith(txt, 'B'), BSendsWith(txt, 'B')) +}