From ab7522f55cc3dafcb7f55dbc9b1123c9df4bc0df Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 14 May 2020 15:46:24 +0800 Subject: [PATCH 1/6] replace substring globally with substr [efficiency] --- .dev/CRAN_Release.cmd | 5 ++++- .dev/revdep.R | 2 +- R/data.table.R | 26 +++++++++++++------------- R/fread.R | 23 ++++++++++------------- R/test.data.table.R | 4 ++-- R/utils.R | 8 ++++++++ inst/tests/benchmark.Rraw | 4 ++-- inst/tests/tests.Rraw | 6 +++--- 8 files changed, 43 insertions(+), 35 deletions(-) diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd index e629ee980b..faa15d0d53 100644 --- a/.dev/CRAN_Release.cmd +++ b/.dev/CRAN_Release.cmd @@ -164,7 +164,10 @@ grep -n "[^A-Za-z0-9]F[^A-Za-z0-9]" ./inst/tests/tests.Rraw grep -Enr "^[^#]*(?:\[|==|>|<|>=|<=|,|\(|\+)\s*[-]?[0-9]+[^0-9L:.e]" R | grep -Ev "stop|warning|tolerance" # Never use ifelse. fifelse for vectors when necessary (nothing yet) - grep -Enr "\bifelse" R +grep -Enr "\bifelse" R + +# substring is always slower than substr. use cases of substring over substr are limited +grep -Fnr "substring" R # No system.time in main tests.Rraw. Timings should be in benchmark.Rraw grep -n "system[.]time" ./inst/tests/tests.Rraw diff --git a/.dev/revdep.R b/.dev/revdep.R index 772486558e..861f9d3f79 100644 --- a/.dev/revdep.R +++ b/.dev/revdep.R @@ -115,7 +115,7 @@ status = function(which="both") { if (file.exists(fn)) { v = suppressWarnings(system(paste0("grep 'Status:' ",fn), intern=TRUE)) if (!length(v)) return("RUNNING") - return(substring(v,9)) + return(substring(v, 9L, nchar(v))) } if (file.exists(paste0("./",x,".Rcheck"))) return("RUNNING") return("NOT STARTED") diff --git a/R/data.table.R b/R/data.table.R index 98651d6e0b..66b1bb1d97 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -217,7 +217,7 @@ replace_dot_alias = function(e) { root = if (is.call(jsub)) as.character(jsub[[1L]])[1L] else "" if (root == ":" || (root %chin% c("-","!") && jsub[[2L]] %iscall% '(' && jsub[[2L]][[2L]] %iscall% ':') || - ( (!length(av<-all.vars(jsub)) || all(substring(av,1L,2L)=="..")) && + ( (!length(av<-all.vars(jsub)) || all(startsWith(av, ".."))) && root %chin% c("","c","paste","paste0","-","!") && missingby )) { # test 763. TODO: likely that !missingby iff with==TRUE (so, with can be removed) # When no variable names (i.e. symbols) occur in j, scope doesn't matter because there are no symbols to find. @@ -234,8 +234,8 @@ replace_dot_alias = function(e) { with=FALSE if (length(av)) { for (..name in av) { - name = substring(..name, 3L) - if (name=="") stop("The symbol .. is invalid. The .. prefix must be followed by at least one character.") + name = substr(..name, 3L, nchar(..name)) + if (!nzchar(name)) stop("The symbol .. is invalid. The .. prefix must be followed by at least one character.") if (!exists(name, where=parent.frame())) { stop("Variable '",name,"' is not found in calling scope. Looking in calling scope because you used the .. prefix.", if (exists(..name, where=parent.frame())) @@ -251,7 +251,7 @@ replace_dot_alias = function(e) { ..syms = av } } else if (is.name(jsub)) { - if (substring(jsub, 1L, 2L) == "..") stop("Internal error: DT[, ..var] should be dealt with by the branch above now.") # nocov + if (startsWith(jsub, "..")) stop("Internal error: DT[, ..var] should be dealt with by the branch above now.") # nocov if (!with && !exists(as.character(jsub), where=parent.frame())) stop("Variable '",jsub,"' is not found in calling scope. Looking in calling scope because you set with=FALSE. Also, please use .. symbol prefix and remove with=FALSE.") } @@ -656,7 +656,7 @@ replace_dot_alias = function(e) { j = eval(jsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) # else j will be evaluated for the first time on next line } else { names(..syms) = ..syms - j = eval(jsub, lapply(substring(..syms,3L), get, pos=parent.frame()), parent.frame()) + j = eval(jsub, lapply(substr(..syms, 3L, nchar(..syms)), get, pos=parent.frame()), parent.frame()) } if (is.logical(j)) j <- which(j) if (!length(j) && !notj) return( null.data.table() ) @@ -756,7 +756,7 @@ replace_dot_alias = function(e) { # TODO: could be allowed if length(irows)>1 but then the index would need to be squashed for use by uniqlist, #3062 # find if allbyvars is leading subset of any of the indices; add a trailing "__" to fix #3498 where a longer column name starts with a shorter column name tt = paste0(c(allbyvars,""), collapse="__") - w = which.first(substring(paste0(indices(x),"__"),1L,nchar(tt)) == tt) + w = which.first(startsWith(paste0(indices(x), "__"), tt)) if (!is.na(w)) { byindex = indices(x)[w] if (!length(getindex(x, byindex))) { @@ -861,8 +861,8 @@ replace_dot_alias = function(e) { jvnames = NULL drop_dot = function(x) { if (length(x)!=1L) stop("Internal error: drop_dot passed ",length(x)," items") # nocov - if (identical(substring(x<-as.character(x), 1L, 1L), ".") && x %chin% c(".N", ".I", ".GRP", ".NGRP", ".BY")) - substring(x, 2L) + if (startsWith(x<-as.character(x), ".") && x %chin% c(".N", ".I", ".GRP", ".NGRP", ".BY")) + substr(x, 2L, nchar(x)) else x } @@ -1179,8 +1179,8 @@ replace_dot_alias = function(e) { } syms = all.vars(jsub) - syms = syms[ substring(syms,1L,2L)==".." ] - syms = syms[ substring(syms,3L,3L)!="." ] # exclude ellipsis + syms = syms[ startsWith(syms, "..") ] + syms = syms[ !startsWith(syms, ".") ] # exclude ellipsis for (sym in syms) { if (sym %chin% names_x) { # if "..x" exists as column name, use column, for backwards compatibility; e.g. package socialmixr in rev dep checks #2779 @@ -1188,7 +1188,7 @@ replace_dot_alias = function(e) { # TODO in future, as warned in NEWS item for v1.11.0 : # warning(sym," in j is looking for ",getName," in calling scope, but a column '", sym, "' exists. Column names should not start with ..") } - getName = substring(sym, 3L) + getName = substr(sym, 3L, nchar(sym)) if (!exists(getName, parent.frame())) { if (exists(sym, parent.frame())) next # user did 'manual' prefix; i.e. variable in calling scope has .. prefix stop("Variable '",getName,"' is not found in calling scope. Looking in calling scope because this symbol was prefixed with .. in the j= parameter.") @@ -1661,7 +1661,7 @@ replace_dot_alias = function(e) { # is.symbol() is for #1369, #1974 and #2949 if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE) if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE) # 875 - if ((length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) && (!q1 %chin% c("head","tail"))) return(TRUE) + if ((length(q)==2L || startsWith(names(q)[3L], "na")) && (!q1 %chin% c("head","tail"))) return(TRUE) # ... head-tail uses default value n=6 which as of now should not go gforce ^^ # otherwise there must be three arguments, and only in two cases: # 1) head/tail(x, 1) or 2) x[n], n>0 @@ -1834,7 +1834,7 @@ replace_dot_alias = function(e) { if (length(expr)==2L) # no parameters passed to mean, so defaults of trim=0 and na.rm=FALSE return(call(".External",quote(Cfastmean),expr[[2L]], FALSE)) # return(call(".Internal",expr)) # slightly faster than .External, but R now blocks .Internal in coerce.c from apx Sep 2012 - if (length(expr)==3L && identical("na",substring(names(expr)[3L], 1L, 2L))) # one parameter passed to mean() + if (length(expr)==3L && startsWith(names(expr)[3L], "na")) # one parameter passed to mean() return(call(".External",quote(Cfastmean),expr[[2L]], expr[[3L]])) # faster than .Call assign("nomeanopt",TRUE,parent.frame()) expr # e.g. trim is not optimized, just na.rm diff --git a/R/fread.R b/R/fread.R index d57d2cd6fd..6b73f38ec1 100644 --- a/R/fread.R +++ b/R/fread.R @@ -50,13 +50,11 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir()) if (input=="" || length(grep('\\n|\\r', input))) { # input is data itself containing at least one \n or \r } else { - if (substring(input,1L,1L)==" ") { + if (startsWith(input, " ")) { stop("input= contains no \\n or \\r, but starts with a space. Please remove the leading space, or use text=, file= or cmd=") } - str6 = substring(input,1L,6L) # avoid grepl() for #2531 - str7 = substring(input,1L,7L) - str8 = substring(input,1L,8L) - if (str7=="ftps://" || str8=="https://") { + str7 = substr(input, 1L, 7L) # avoid grepl() for #2531 + if (str7=="ftps://" || startsWith(input, "https://")) { # nocov start if (!requireNamespace("curl", quietly = TRUE)) stop("Input URL requires https:// connection for which fread() requires 'curl' package which cannot be found. Please install 'curl' using 'install.packages('curl')'.") # nocov @@ -66,7 +64,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir()) on.exit(unlink(tmpFile), add=TRUE) # nocov end } - else if (str6=="ftp://" || str7== "http://" || str7=="file://") { + else if (startsWith(input, "ftp://") || str7== "http://" || str7=="file://") { # nocov start method = if (str7=="file://") "internal" else getOption("download.file.method", default="auto") # force "auto" when file:// to ensure we don't use an invalid option (e.g. wget), #1668 @@ -102,12 +100,10 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir()) if (data.table) 'data.table' else 'data.frame', ".") return(if (data.table) data.table(NULL) else data.frame(NULL)) } - ext2 = substring(file, nchar(file)-2L, nchar(file)) # last 3 characters ".gz" - ext3 = substring(file, nchar(file)-3L, nchar(file)) # last 4 characters ".bz2" - if (ext2==".gz" || ext3==".bz2") { + if ((is_gz <- endsWith(file, ".gz")) || endsWith(file, ".bz2")) { if (!requireNamespace("R.utils", quietly = TRUE)) stop("To read gz and bz2 files directly, fread() requires 'R.utils' package which cannot be found. Please install 'R.utils' using 'install.packages('R.utils')'.") # nocov - FUN = if (ext2==".gz") gzfile else bzfile + FUN = if (is_gz) gzfile else bzfile R.utils::decompressFile(file, decompFile<-tempfile(tmpdir=tmpdir), ext=NULL, FUN=FUN, remove=FALSE) # ext is not used by decompressFile when destname is supplied, but isn't optional file = decompFile # don't use 'tmpFile' symbol again, as tmpFile might be the http://domain.org/file.csv.gz download on.exit(unlink(decompFile), add=TRUE) @@ -169,9 +165,10 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir()) yaml_border_re = '^#?---' if (!grepl(yaml_border_re, first_line)) { close(f) - stop('Encountered <', substring(first_line, 1L, 50L), if (nchar(first_line) > 50L) '...', '> at the first ', - 'unskipped line (', 1L+skip, '), which does not constitute the start to a valid YAML header ', - '(expecting something matching regex "', yaml_border_re, '"); please check your input and try again.') + stop(domain=NA, gettextf(domain="R-data.table", + 'Encountered <%s%s> at the first unskipped line (%d), which does not constitute the start to a valid YAML header (expecting something matching regex "%s"); please check your input and try again.', + substr(first_line, 1L, 50L), if (nchar(first_line) > 50L) '...' else '', 1L+skip, yaml_border_re + )) } yaml_comment_re = '^#' diff --git a/R/test.data.table.R b/R/test.data.table.R index 14d5ae83bf..a97111221e 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -199,10 +199,10 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F compactprint = function(DT, topn=2L) { tt = vapply_1c(DT,function(x)class(x)[1L]) tt[tt=="integer64"] = "i64" - tt = substring(tt, 1L, 3L) + tt = substr(tt, 1L, 3L) makeString = function(x) paste(x, collapse = ",") # essentially toString.default cn = paste0(" [Key=",makeString(key(DT)), - " Types=", makeString(substring(sapply(DT, typeof), 1L, 3L)), + " Types=", makeString(substr(sapply(DT, typeof), 1L, 3L)), " Classes=", makeString(tt), "]") if (nrow(DT)) { print(copy(DT)[,(cn):="",verbose=FALSE], topn=topn, class=FALSE) diff --git a/R/utils.R b/R/utils.R index 42e67ea8de..1342890914 100644 --- a/R/utils.R +++ b/R/utils.R @@ -25,6 +25,14 @@ if (base::getRversion() < "3.2.0") { # Apr 2015 isNamespaceLoaded = function(x) x %chin% loadedNamespaces() } +# R 3.3.0 [April 2016] +if (!exists('startsWith', as.environment('package:base'))) { + startsWith = function(x, stub) substr(x, 1L, nchar(stub)) == stub +} +if (!exists('endsWith', as.environment('package:base'))) { + endsWith = function(x, stub) {n = nchar(x); substr(x, n-nchar(stub)+1L, n) == stub} +} + # which.first which.first = function(x) { diff --git a/inst/tests/benchmark.Rraw b/inst/tests/benchmark.Rraw index 1c8bf146a6..bf0bf77e9f 100644 --- a/inst/tests/benchmark.Rraw +++ b/inst/tests/benchmark.Rraw @@ -161,10 +161,10 @@ set.seed(1) L = lapply(1:1e6, sample, x=100, size=2) x = capture.output(fwrite(L)) test(1742.1, nchar(x), c(2919861L, 2919774L)) # tests 2 very long lines, too -test(1742.2, substring(x,1,10), c("27,58,21,9","38,91,90,6")) +test(1742.2, substr(x, 1L, 10L), c("27,58,21,9", "38,91,90,6")) test(1742.3, L[[1L]], c(27L,38L)) test(1742.4, L[[1000000L]], c(76L, 40L)) -test(1742.5, substring(x,nchar(x)-10,nchar(x)), c("50,28,95,76","62,87,23,40")) +test(1742.5, substr(x, nchar(x)-10L, nchar(x)), c("50,28,95,76","62,87,23,40")) # Add scaled-up non-ASCII forder test 1896 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ed17470383..c23382d34f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3408,7 +3408,7 @@ test(1100, dt1[dt2,roll=-Inf,rollends=c(FALSE,TRUE)]$ind, INT(NA,NA,1,2,2,2,2,2, set.seed(3) DT = data.table(a=5:1, b=runif(5)) ans = dcast(DT, a ~ b, value.var="b")[c(4,.N), c(2,6)] - setnames(ans, substring(names(ans),1,6)) + setnames(ans, substr(names(ans), 1L, 6L)) test(1102.06, ans, data.table("0.1680"=c(NA,DT[1,b]), "0.8075"=c(DT[2,b],NA))) # Fix for case 2 in bug report #71 - dcast didn't aggregate properly when formula RHS has "." @@ -7253,7 +7253,7 @@ test(1530.4, which.last(x), tail(which(x), 1L)) set.seed(2L) x = apply(matrix(sample(letters, 12), nrow=2), 1, paste, collapse="") y = factor(sample(c(letters[1:5], x), 20, TRUE)) -xsub = substring(x, 1L, 1L) +xsub = substr(x, 1L, 1L) test(1532.1, y %like% xsub[1L], grepl(xsub[1L], y)) test(1532.2, y %like% xsub[2L], grepl(xsub[2L], y)) test(1532.3, like(y, xsub[1L]), grepl(xsub[1L], y)) @@ -9435,7 +9435,7 @@ nqjoin_test <- function(x, y, k=1L, test_no, mult="all") { runcmb = as.data.table(runcmb[, 1:min(100L, ncol(runcmb)), drop=FALSE]) # max 100 combinations to test runops = lapply(runcmb, function(cols) { thisops = sample(ops, k, TRUE) - thisops[substring(cols,1,1)=="c"] = "==" + thisops[startsWith(cols, "c")] = "==" thisops }) is_only_na <- function(x) is.na(x) & !is.nan(x) From 826d04d548116496129cabf7874056a9524c93e7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 14 May 2020 16:13:27 +0800 Subject: [PATCH 2/6] fix mistakes --- R/data.table.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 66b1bb1d97..f43182e005 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -251,7 +251,7 @@ replace_dot_alias = function(e) { ..syms = av } } else if (is.name(jsub)) { - if (startsWith(jsub, "..")) stop("Internal error: DT[, ..var] should be dealt with by the branch above now.") # nocov + if (startsWith(as.character(jsub), "..")) stop("Internal error: DT[, ..var] should be dealt with by the branch above now.") # nocov if (!with && !exists(as.character(jsub), where=parent.frame())) stop("Variable '",jsub,"' is not found in calling scope. Looking in calling scope because you set with=FALSE. Also, please use .. symbol prefix and remove with=FALSE.") } @@ -1180,7 +1180,7 @@ replace_dot_alias = function(e) { syms = all.vars(jsub) syms = syms[ startsWith(syms, "..") ] - syms = syms[ !startsWith(syms, ".") ] # exclude ellipsis + syms = syms[ substr(syms, 3L, 3L) != "." ] # exclude ellipsis for (sym in syms) { if (sym %chin% names_x) { # if "..x" exists as column name, use column, for backwards compatibility; e.g. package socialmixr in rev dep checks #2779 @@ -1661,7 +1661,7 @@ replace_dot_alias = function(e) { # is.symbol() is for #1369, #1974 and #2949 if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE) if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE) # 875 - if ((length(q)==2L || startsWith(names(q)[3L], "na")) && (!q1 %chin% c("head","tail"))) return(TRUE) + if ((length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na"))) && (!q1 %chin% c("head","tail"))) return(TRUE) # ... head-tail uses default value n=6 which as of now should not go gforce ^^ # otherwise there must be three arguments, and only in two cases: # 1) head/tail(x, 1) or 2) x[n], n>0 From 467a5918109439a68373a16b981a4512996d3ee5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 19 May 2020 12:23:02 +0800 Subject: [PATCH 3/6] one more --- .dev/revdep.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.dev/revdep.R b/.dev/revdep.R index 861f9d3f79..4d6f5f2226 100644 --- a/.dev/revdep.R +++ b/.dev/revdep.R @@ -115,7 +115,7 @@ status = function(which="both") { if (file.exists(fn)) { v = suppressWarnings(system(paste0("grep 'Status:' ",fn), intern=TRUE)) if (!length(v)) return("RUNNING") - return(substring(v, 9L, nchar(v))) + return(substr(v, 9L, nchar(v))) } if (file.exists(paste0("./",x,".Rcheck"))) return("RUNNING") return("NOT STARTED") From 683349bc265af6327ed431e32863188366d64efb Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 19 May 2020 12:51:09 +0800 Subject: [PATCH 4/6] add some tests --- inst/tests/tests.Rraw | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c23382d34f..67a69077d2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16853,3 +16853,16 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN)) test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) + +# test compatibility of endsWith backport & base::endsWith +if (exists('endsWith', 'package:base')) { + DTendsWith = function(x, stub) {n = nchar(x); substr(x, n-nchar(stub)+1L, n) == stub} + BSendsWith = base::endsWith + test(2139.1, DTendsWith('abcd', 'd'), BSendsWith('abcd', 'd')) + test(2139.2, DTendsWith(letters, 'e'), BSendsWith(letters, 'e')) + test(2139.3, DTendsWith(NA_character_, 'a'), BSendsWith(NA_character_, 'a')) + test(2139.4, DTendsWith(character(), 'a'), BSendsWith(character(), 'a')) + # file used in encoding tests + txt = readLines(testDir("issue_563_fread.txt")) + test(2139.5, DTendsWith(txt, 'B'), BSendsWith(txt, 'B')) +} From 001a2ba581318aeddb80a4f1e5ab2d6593b2bc74 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Wed, 16 Jun 2021 16:21:36 -0600 Subject: [PATCH 5/6] tweaks --- .dev/CRAN_Release.cmd | 2 +- R/data.table.R | 3 ++- R/fread.R | 2 +- R/utils.R | 9 ++++----- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd index efdb759fc6..1dfec0a02a 100644 --- a/.dev/CRAN_Release.cmd +++ b/.dev/CRAN_Release.cmd @@ -156,7 +156,7 @@ grep -Enr "^[^#]*(?:\[|==|>|<|>=|<=|,|\(|\+)\s*[-]?[0-9]+[^0-9L:.e]" R | grep -E # Never use ifelse. fifelse for vectors when necessary (nothing yet) grep -Enr "\bifelse" R -# substring is always slower than substr. use cases of substring over substr are limited +# use substr() instead of substring(), #4447 grep -Fnr "substring" R # No system.time in main tests.Rraw. Timings should be in benchmark.Rraw diff --git a/R/data.table.R b/R/data.table.R index 01cb6a710a..79b8e6483d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1732,7 +1732,8 @@ replace_dot_alias = function(e) { if (!(is.call(q) && is.symbol(q[[1L]]) && is.symbol(q[[2L]]) && (q1 <- q[[1L]]) %chin% gfuns)) return(FALSE) if (!(q2 <- q[[2L]]) %chin% names(SDenv$.SDall) && q2 != ".I") return(FALSE) # 875 if ((length(q)==2L || (!is.null(names(q)) && startsWith(names(q)[3L], "na"))) && (!q1 %chin% c("head","tail"))) return(TRUE) - # ... head-tail uses default value n=6 which as of now should not go gforce ^^ + # ^^ base::startWith errors on NULL unfortunately + # head-tail uses default value n=6 which as of now should not go gforce ... ^^ # otherwise there must be three arguments, and only in two cases: # 1) head/tail(x, 1) or 2) x[n], n>0 length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) && diff --git a/R/fread.R b/R/fread.R index b0980b0e98..eb765fe639 100644 --- a/R/fread.R +++ b/R/fread.R @@ -170,7 +170,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") yaml_border_re = '^#?---' if (!grepl(yaml_border_re, first_line)) { close(f) - stop(domain=NA, gettextf(domain="R-data.table", + stop(gettextf( 'Encountered <%s%s> at the first unskipped line (%d), which does not constitute the start to a valid YAML header (expecting something matching regex "%s"); please check your input and try again.', substr(first_line, 1L, 50L), if (nchar(first_line) > 50L) '...' else '', 1L+skip, yaml_border_re )) diff --git a/R/utils.R b/R/utils.R index 9029a81501..7a698131c6 100644 --- a/R/utils.R +++ b/R/utils.R @@ -25,12 +25,11 @@ if (base::getRversion() < "3.2.0") { # Apr 2015 isNamespaceLoaded = function(x) x %chin% loadedNamespaces() } -# R 3.3.0 [April 2016] -if (!exists('startsWith', as.environment('package:base'))) { - startsWith = function(x, stub) substr(x, 1L, nchar(stub)) == stub +if (!exists('startsWith', 'package:base', inherits=FALSE)) { # R 3.3.0; Apr 2016 + startsWith = function(x, stub) substr(x, 1L, nchar(stub))==stub } -if (!exists('endsWith', as.environment('package:base'))) { - endsWith = function(x, stub) {n = nchar(x); substr(x, n-nchar(stub)+1L, n) == stub} +if (!exists('endsWith', 'package:base', inherits=FALSE)) { + endsWith = function(x, stub) {n=nchar(x); substr(x, n-nchar(stub)+1L, n)==stub} } # which.first From e8ac4eedf1c3a2dbd93ccddef4663b19e7734dc5 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Wed, 16 Jun 2021 17:05:49 -0600 Subject: [PATCH 6/6] inherits=FALSE in the test too --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f6b71ca001..668b63ff8c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17729,7 +17729,7 @@ if (test_bit64) { } # compatibility of endsWith backport with base::endsWith -if (exists('endsWith', 'package:base')) { +if (exists('endsWith', 'package:base', inherits=FALSE)) { DTendsWith = function(x, stub) {n=nchar(x); substr(x, n-nchar(stub)+1L, n)==stub} BSendsWith = base::endsWith test(2194.1, DTendsWith('abcd', 'd'), BSendsWith('abcd', 'd'))