diff --git a/NEWS.md b/NEWS.md index 27faee2325..e932f3ea20 100644 --- a/NEWS.md +++ b/NEWS.md @@ -222,6 +222,8 @@ 24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report. +25. Slight fix to the logic for auto-naming the `by` clause for using a custom function like `evaluate` to now be named `evaluate` instead of the name of the first symbolic argument, [#3758](https://github.com/Rdatatable/data.table/issues/3758). + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/R/IDateTime.R b/R/IDateTime.R index c6db541160..8e03631066 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -87,8 +87,9 @@ round.IDate = function (x, digits=c("weeks", "months", "quarters", "years"), ... `+.IDate` = function (e1, e2) { if (nargs() == 1L) return(e1) + # TODO: investigate Ops.IDate method a la Ops.difftime if (inherits(e1, "difftime") || inherits(e2, "difftime")) - stop("difftime objects may not be added to IDate. Use plain integer instead of difftime.") + stop("Internal error -- difftime objects may not be added to IDate, but Ops dispatch should have intervened to prevent this") # nocov if (isReallyReal(e1) || isReallyReal(e2)) { return(`+.Date`(e1, e2)) # IDate doesn't support fractional days; revert to base Date @@ -108,7 +109,7 @@ round.IDate = function (x, digits=c("weeks", "months", "quarters", "years"), ... if (nargs() == 1L) stop("unary - is not defined for \"IDate\" objects") if (inherits(e2, "difftime")) - stop("difftime objects may not be subtracted from IDate. Use plain integer instead of difftime.") + stop("Internal error -- difftime objects may not be subtracted from IDate, but Ops dispatch should have intervened to prevent this") # nocov if ( isReallyReal(e2) ) { # IDate deliberately doesn't support fractional days so revert to base Date diff --git a/R/bmerge.R b/R/bmerge.R index 321a270749..2ff0f86524 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -57,7 +57,7 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos } } if (xclass == iclass) { - if (verbose) cat("i.",names(i)[ic],"has same type (",xclass,") as x.",names(x)[xc],". No coercion needed.\n", sep="") + if (verbose) cat("i.",names(i)[ic]," has same type (",xclass,") as x.",names(x)[xc],". No coercion needed.\n", sep="") next } if (xclass=="character" || iclass=="character" || diff --git a/R/data.table.R b/R/data.table.R index a7e085a5fd..0d61bb8bc2 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -841,8 +841,10 @@ replace_order = function(isub, verbose, env) { byvars = all.vars(bysubl[[jj+1L]], functions = TRUE) if (length(byvars) == 1L) tt = byvars else { - tt = grep("^eval|^[^[:alpha:]. ]",byvars,invert=TRUE,value=TRUE) - if (length(tt)) tt = tt[1L] else all.vars(bysubl[[jj+1L]])[1L] + # take the first variable that is (1) not eval (#3758) and (2) starts with a character that can't start a variable name + tt = grep("^eval$|^[^[:alpha:]. ]", byvars, invert=TRUE, value=TRUE) + # byvars but exclude functions or `0`+`1` becomes `+` + tt = if (length(tt)) tt[1L] else all.vars(bysubl[[jj+1L]])[1L] } # fix for #497 if (length(byvars) > 1L && tt %chin% all.vars(jsub, FALSE)) { @@ -1087,9 +1089,9 @@ replace_order = function(isub, verbose, env) { if (is.list(k)) { origj = j = if (name[[1L]] == "$") as.character(name[[3L]]) else eval(name[[3L]], parent.frame(), parent.frame()) if (is.character(j)) { - if (length(j)!=1L) stop("L[[i]][,:=] syntax only valid when i is length 1, but it's length %d",length(j)) + if (length(j)!=1L) stop("Cannot assign to an under-allocated recursively indexed list -- L[[i]][,:=] syntax is only valid when i is length 1, but it's length ", length(j)) j = match(j, names(k)) - if (is.na(j)) stop("Item '",origj,"' not found in names of list") + if (is.na(j)) stop("Internal error -- item '", origj, "' not found in names of list") # nocov } .Call(Csetlistelt,k,as.integer(j), x) } else if (is.environment(k) && exists(as.character(name[[3L]]), k)) { @@ -1118,7 +1120,7 @@ replace_order = function(isub, verbose, env) { xcolsAns = seq_along(ansvars) icols = icolsAns = integer() } else { - if (!length(leftcols)) stop("column(s) not found: ", paste(ansvars[wna],collapse=", ")) + if (!length(leftcols)) stop("Internal error -- column(s) not found: ", paste(ansvars[wna],collapse=", ")) # nocov xcols = w[!wna] xcolsAns = which(!wna) map = c(seq_along(i), leftcols) # this map is to handle dups in leftcols, #3635 @@ -1131,7 +1133,7 @@ replace_order = function(isub, verbose, env) { if (any(w2na <- is.na(w2))) { ivars[leftcols] = paste0("i.",ivars[leftcols]) w2[w2na] = chmatch(ansvars[wna][w2na], ivars) - if (any(w2na <- is.na(w2))) stop("column(s) not found: ", paste(ansvars[wna][w2na],sep=", ")) + if (any(w2na <- is.na(w2))) stop("Internal error -- column(s) not found: ", paste(ansvars[wna][w2na],sep=", ")) # nocov } } icols = w2 @@ -1294,7 +1296,7 @@ replace_order = function(isub, verbose, env) { identical(irows, integer(0L)) && !bynull, length(irows) && !anyNA(irows) && all(irows==0L) ## anyNA() because all() returns NA (not FALSE) when irows is all-NA. TODO: any way to not check all 'irows' values? )) - if (is.atomic(jval)) jval = jval[0L] else jval = lapply(jval, `[`, 0L) + jval = lapply(jval, `[`, 0L) if (is.atomic(jval)) { setattr(jval,"names",NULL) jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead? @@ -1884,7 +1886,7 @@ as.matrix.data.table = function(x, rownames=NULL, rownames.value=NULL, ...) { non.numeric = non.atomic = FALSE all.logical = TRUE for (j in seq_len(p)) { - if (is.ff(X[[j]])) X[[j]] = X[[j]][] # to bring the ff into memory, since we need to create a matrix in memory + if (is.ff(X[[j]])) X[[j]] = X[[j]][] # nocov to bring the ff into memory, since we need to create a matrix in memory xj = X[[j]] if (length(dj <- dim(xj)) == 2L && dj[2L] > 1L) { if (inherits(xj, "data.table")) @@ -1930,13 +1932,13 @@ as.matrix.data.table = function(x, rownames=NULL, rownames.value=NULL, ...) { # bug #2375. fixed. same as head.data.frame and tail.data.frame to deal with negative indices head.data.table = function(x, n=6L, ...) { - if (!cedta()) return(NextMethod()) + if (!cedta()) return(NextMethod()) # nocov stopifnot(length(n) == 1L) i = seq_len(if (n<0L) max(nrow(x)+n, 0L) else min(n,nrow(x))) x[i, , ] } tail.data.table = function(x, n=6L, ...) { - if (!cedta()) return(NextMethod()) + if (!cedta()) return(NextMethod()) # nocov stopifnot(length(n) == 1L) n = if (n<0L) max(nrow(x) + n, 0L) else min(n, nrow(x)) i = seq.int(to=nrow(x), length.out=n) @@ -2077,7 +2079,7 @@ within.data.table = function (data, expr, ...) # basically within.list but retains key (if any) # will be slower than using := or a regular query (see ?within for further info). { - if (!cedta()) return(NextMethod()) + if (!cedta()) return(NextMethod()) # nocov parent = parent.frame() e = evalq(environment(), data, parent) eval(substitute(expr), e) # might (and it's known that some user code does) contain rm() @@ -2101,7 +2103,7 @@ within.data.table = function (data, expr, ...) transform.data.table = function (`_data`, ...) # basically transform.data.frame with data.table instead of data.frame, and retains key { - if (!cedta()) return(NextMethod()) + if (!cedta()) return(NextMethod()) # nocov e = eval(substitute(list(...)), `_data`, parent.frame()) tags = names(e) inx = chmatch(tags, names(`_data`)) @@ -2176,7 +2178,7 @@ any_na = function(x, by=seq_along(x)) .Call(CanyNA, x, by) na.omit.data.table = function (object, cols = seq_along(object), invert = FALSE, ...) { # compare to stats:::na.omit.data.frame - if (!cedta()) return(NextMethod()) + if (!cedta()) return(NextMethod()) # nocov if ( !missing(invert) && is.na(as.logical(invert)) ) stop("Argument 'invert' must be logical TRUE/FALSE") if (is.character(cols)) { @@ -2761,7 +2763,7 @@ rowid = function(..., prefix=NULL) { rowidv = function(x, cols=seq_along(x), prefix=NULL) { if (!is.null(prefix) && (!is.character(prefix) || length(prefix) != 1L)) - stop("prefix must be NULL or a character vector of length=1.") + stop("'prefix' must be NULL or a character vector of length 1.") if (is.atomic(x)) { if (!missing(cols) && !is.null(cols)) stop("x is a single vector, non-NULL 'cols' doesn't make sense.") @@ -2769,7 +2771,7 @@ rowidv = function(x, cols=seq_along(x), prefix=NULL) { x = as_list(x) } else { if (!length(cols)) - stop("x is a list, 'cols' can not be on 0-length.") + stop("x is a list, 'cols' cannot be 0-length.") if (is.character(cols)) cols = chmatch(cols, names(x)) cols = as.integer(cols) @@ -2790,7 +2792,7 @@ rleid = function(..., prefix=NULL) { rleidv = function(x, cols=seq_along(x), prefix=NULL) { if (!is.null(prefix) && (!is.character(prefix) || length(prefix) != 1L)) - stop("prefix must be NULL or a character vector of length=1.") + stop("'prefix' must be NULL or a character vector of length 1.") if (is.atomic(x)) { if (!missing(cols) && !is.null(cols)) stop("x is a single vector, non-NULL 'cols' doesn't make sense.") @@ -2798,7 +2800,7 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) { x = as_list(x) } else { if (!length(cols)) - stop("x is a list, 'cols' can not be 0-length.") + stop("x is a list, 'cols' cannot be 0-length.") if (is.character(cols)) cols = chmatch(cols, names(x)) cols = as.integer(cols) @@ -2880,7 +2882,7 @@ isReallyReal = function(x) { ## redirect to normal DT[x == TRUE] stub = call("==", as.symbol(col), TRUE) } - if (length(stub[[1L]]) != 1) return(NULL) ## Whatever it is, definitely not one of the valid operators + if (length(stub[[1L]]) != 1) return(NULL) # nocov Whatever it is, definitely not one of the valid operators operator = as.character(stub[[1L]]) if (!operator %chin% validOps$op) return(NULL) ## operator not supported if (!is.name(stub[[2L]])) return(NULL) @@ -2902,7 +2904,6 @@ isReallyReal = function(x) { # the mode() checks also deals with NULL since mode(NULL)=="NULL" and causes this return, as one CRAN package (eplusr 0.9.1) relies on return(NULL) } - if(is.character(x[[col]]) && !operator %chin% c("==", "%in%", "%chin%")) return(NULL) ## base R allows for non-equi operators on character columns, but these can't be optimized. if (!operator %chin% c("%in%", "%chin%")) { # additional requirements for notjoin and NA values. Behaviour is different for %in%, %chin% compared to other operators # RHS is of length=1 or n @@ -2998,7 +2999,6 @@ isReallyReal = function(x) { pat = paste0("(", ops, ")", collapse="|") if (is.call(onsub) && onsub[[1L]] == "eval") { onsub = eval(onsub[[2L]], parent.frame(2L), parent.frame(2L)) - if (is.call(onsub) && onsub[[1L]] == "eval") { onsub = onsub[[2L]] } } if (is.call(onsub) && as.character(onsub[[1L]]) %chin% c("list", ".")) { spat = paste0("[ ]+(", pat, ")[ ]+") diff --git a/R/groupingsets.R b/R/groupingsets.R index e1dda42654..a13d71cd50 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -82,7 +82,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...) } } if (id && "grouping" %chin% names(empty)) # `j` could have been evaluated to `grouping` field - stop("When using `id=TRUE` the 'j' expression must not evaluate to column named 'grouping'.") + stop("When using `id=TRUE` the 'j' expression must not evaluate to a column named 'grouping'.") if (anyDuplicated(names(empty)) > 0L) stop("There exists duplicated column names in the results, ensure the column passed/evaluated in `j` and those in `by` are not overlapping.") # adding grouping column to template - aggregation level identifier diff --git a/R/print.data.table.R b/R/print.data.table.R index aedbcf6720..ac82dcfafa 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -83,8 +83,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), expression = "", ordered = "") classes = vapply(x, function(col) class(col)[1L], "", USE.NAMES=FALSE) abbs = unname(class_abb[classes]) - if ( length(idx <- which(is.na(abbs))) ) - abbs[idx] = paste0("<", classes[idx], ">") + if ( length(idx <- which(is.na(abbs))) ) abbs[idx] = paste0("<", classes[idx], ">") toprint = rbind(abbs, toprint) rownames(toprint)[1L] = "" } diff --git a/R/setops.R b/R/setops.R index 230576c7d8..a4a3add4b6 100644 --- a/R/setops.R +++ b/R/setops.R @@ -154,7 +154,7 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu paste0(names(targetTypes)[w],"(",paste(targetTypes[w],currentTypes[w],sep="!="),")") ,collapse=" "))) } - + # check key k1 = key(target) k2 = key(current) diff --git a/R/transpose.R b/R/transpose.R index e4d15bba6a..25085c5c21 100644 --- a/R/transpose.R +++ b/R/transpose.R @@ -24,6 +24,8 @@ transpose = function(l, fill=NA, ignore.empty=FALSE, keep.names=NULL, make.names } tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) { + if (!isTRUEorFALSE(names) && !is.character(names)) + stop("'names' must be TRUE/FALSE or a character vector.") ans = transpose(strsplit(as.character(x), ...), fill=fill, ignore.empty=FALSE) if (!missing(keep)) { keep = suppressWarnings(as.integer(keep)) @@ -37,8 +39,6 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) { if(type.convert) ans = lapply(ans, type.convert, as.is = TRUE) if (isFALSE(names)) return(ans) else if (isTRUE(names)) names = paste0("V", seq_along(ans)) - if (!is.character(names)) - stop("'names' must be TRUE/FALSE or a character vector.") if (length(names) != length(ans)) { str = if (missing(keep)) "ans" else "keep" stop("length(names) (= ", length(names), diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c934e36a21..edb1476b86 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -22,6 +22,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { as.IDate.default = data.table:::as.IDate.default as.ITime.default = data.table:::as.ITime.default binary = data.table:::binary + bmerge = data.table:::bmerge brackify = data.table:::brackify chmatchdup = data.table:::chmatchdup compactprint = data.table:::compactprint @@ -256,6 +257,7 @@ test(69.5, nrow(tables(env=xenv)), 1L, output="NAME NROW NCOL MB COLS KEY\n1: xenv$DT = data.table(A=1:2, B=3:4, C=5:6, D=7:8, E=9:10, F=11:12, G=13:14, H=15:16, key="A,D,F,G") test(69.6, nrow(tables(env=xenv)), 1L, output="NAME NROW NCOL MB COLS KEY\n1: DT 2 8 0 A,B,C,D,E,F,... A,D,F,G.*Total: 0MB") rm(xenv) +test(69.7, tables(order.col='asdf'), error="not a column name of info") a = "d" # Variable Twister. a in this scope has same name as a inside DT scope. @@ -1653,6 +1655,7 @@ test(567, DT[,.N,list(a,b)][,N,by=a]$N, c(1L,1L,2L,1L)) test(568, DT[,.N,list(a,b)][,unique(N),by=a]$V1, c(1L,2L,1L)) test(569, DT[,list(.N=.N),list(a,b)][,.N,a], error="The column '.N' can't be grouped because") test(570, DT[,list(.N=.N),list(a,b)][,unique(.N),a], error="The column '.N' can't be grouped because") +test(570.1, DT[,list(.I=.I),list(a,b)][,.I,a], error="The column '.I' can't be grouped because") # Test spaces in by="..." format, datatable-help on 31 March DT = data.table("a "=1:2, "b"=3:4," b"=5:6, v=1:6) @@ -8205,13 +8208,18 @@ test(1610.1, capture.output(print(DT1, class=TRUE)), "1: 1 1 a 1+0i 1 1", "2: 2 2 b 2+0i 2 2", "3: 3 3 c 3+0i 3 3")) -# fails on travis and appveyor; no idea why.. Passes on my mac and windows machine. -# test(1610.2, capture.output(print(DT2, class=TRUE)) -# c(" Dcol Pcol gcol Icol ucol", -# " ", -# "1: 2016-01-01 2016-01-01 01:00:00 TRUE 2016-01-01 1", -# "2: 2016-01-02 2016-01-02 01:00:00 TRUE 2016-01-02 2", -# "3: 2016-01-03 2016-01-03 01:00:00 TRUE 2016-01-03 3")) +DT2 = data.table( + Dcol = as.Date('2016-01-01') + 0:2, + Pcol = as.POSIXct('2016-01-01 01:00:00', tz = 'UTC') + 86400L*(0:2), + gcol = TRUE, Icol = as.IDate(16801) + 0:2, + ucol = `class<-`(1:3, 'asdf') +) +test(1610.2, capture.output(print(DT2, class=TRUE)), + c(" Dcol Pcol gcol Icol ucol", + " ", + "1: 2016-01-01 2016-01-01 01:00:00 TRUE 2016-01-01 1", + "2: 2016-01-02 2016-01-02 01:00:00 TRUE 2016-01-02 2", + "3: 2016-01-03 2016-01-03 01:00:00 TRUE 2016-01-03 3")) # fix for #833 l1 = list(a=seq_len(5), matrix(seq_len(25),ncol = 5, nrow = 5)) @@ -8395,7 +8403,11 @@ x = data.table(-0.000189921844659375) # tolerance in action y = data.table(-0.000189921844655161) test(1613.561, all(all.equal(x, y, ignore.row.order = FALSE), all.equal(x, y, ignore.row.order = TRUE))) test(1613.562, all(is.character(all.equal(x, y, ignore.row.order = FALSE, tolerance = 0)), is.character(all.equal(x, y, ignore.row.order = TRUE, tolerance = 0)))) -test(1613.563, all(all.equal(rbind(x,y), rbind(y,y), ignore.row.order = FALSE), all.equal(rbind(x,y), rbind(y,y), ignore.row.order = TRUE))) +test(1613.563, all( + all.equal(rbind(x,y), rbind(y,y), ignore.row.order=FALSE), + all.equal(rbind(x,y), rbind(y,y), ignore.row.order=TRUE), + all.equal(rbind(y,y), rbind(x,y), ignore.row.order=TRUE) +)) test(1613.564, all(is.character(all.equal(rbind(x,y), rbind(y,y), ignore.row.order = FALSE, tolerance = 0)), is.character(all.equal(rbind(x,y), rbind(y,y), ignore.row.order = TRUE, tolerance = 0)))) test(1613.565, all(all.equal(rbind(x,x,y), rbind(y,y,x), ignore.row.order = FALSE), is.character(r<-all.equal(rbind(x,x,y), rbind(y,y,x), ignore.row.order = TRUE)) && any(grepl("force 'tolerance' argument to 0", r)))) # no-match due factor force tolerance=0 test(1613.566, all(all.equal(rbind(x,y,y), rbind(x,y,y), ignore.row.order = FALSE, tolerance = 0), all.equal(rbind(x,y,y), rbind(x,y,y), ignore.row.order = TRUE, tolerance = 0))) @@ -15003,7 +15015,7 @@ test(2050.6, rbind(DT[1], data.table(f=factor(letters[10:11]))[0])[,levels(f)], test(2051.1, `-.IDate`(structure(0, class="Date"), 1L), structure(-1, class="Date")) test(2051.2, `-.IDate`(1L, 1L), error = 'only subtract from "IDate"') test(2051.3, format.data.table(1L), error = 'Possibly corrupt data.table') -test(2051.4, rleidv(prefix = 1L), error = 'prefix must be NULL or') +test(2051.4, rleidv(prefix = 1L), error = "'prefix' must be NULL or") ## passing Date to second argument of as.POSIXct.ITime t = as.ITime(0L) test(2051.5, as.POSIXct(t, structure(0L, class="Date")), .POSIXct(0, 'UTC')) @@ -15525,6 +15537,115 @@ test(2073.08, transpose(L, make.names=NA), error="make.names=NA is out of range test(2073.09, transpose(L, make.names=2), list(A=INT(1,4), B=INT(2,5), C=INT(3,6))) test(2073.10, transpose(L, make.names=2, keep.names='foo'), list(foo=c("a","b"), A=INT(1,4), B=INT(2,5), C=INT(3,6))) +# 2074.* miscellaneous coverage to bring *.R to 100%; see comments in PR #3761 +## i is NULL +x = NULL +test(2074.01, data.table(1:10)[x], data.table(NULL)) +## auto-guessing of byvars when none of the columns have "normal" names +test(2074.02, data.table(`0`=0, `1`=1)[ , TRUE, by = .(`0` + `1`)], data.table(`0`=1, V1=TRUE)) +## also eval.+ columns are OK, just not eval( patterns, #3758 +evaluate = function(x) c('F', 'D', 'C', 'B', 'A')[findInterval(x, c(0, 60, 70, 80, 90, 100))] +test(2074.03, data.table(grade=c(50L, 91L, 95L, 51L, 89L))[ , .N, by=evaluate(grade)], + data.table(evaluate=c('F', 'A', 'B'), N=c(2L, 2L, 1L))) +## error: use recursive character list indexing to assign when also doing alloc.col() +opt = options(datatable.alloccol=1L) +l = list(foo = list(bar = data.table(a = 1:3, b = 4:6))) +test(2074.04, l[[c('foo', 'bar')]][ , (letters) := 16:18], error = 'under-allocated recursively indexed list') +options(opt) +## alloc.col when using 0-truelength j assigning to a subset +DT = data.table(a=1) +### construct incorrectly to have 0 truelength +zDT = structure(list(b=2), class = c('data.table', 'data.frame')) +test(2074.05, DT[1L, b := zDT], data.table(a=1, b=2)) +## nested .SD in j +DT = data.table(a=1, b=2) +test(2074.06, DT[ , c(.SD[1], .SD[1, .SD[1]]), by=a], data.table(a=1, b=2, b=2)) +## as.matrix.data.table when a column has columns (only possible when constructed incorrectly) +DT = structure(list(a=1:5, d=data.table(b=6:10, c=11:15), m=matrix(16:25, ncol=2L)), class = c('data.table', 'data.frame')) +test(2074.07, as.matrix(DT), matrix(1:25, ncol=5L, dimnames=list(NULL, c('a', 'd.b', 'd.c', 'm.1', 'm.2')))) +## can induce !cedta() from base::rownames to get this error +test(2074.08, rownames(structure(list(1:5), class='data.table')), error="Has it been created manually") +## default dimnames.data.table +test(2074.09, dimnames(data.table(a = 1)), list(NULL, 'a')) +## unlock argument of .shallow +DT = data.table(a = 1) +setattr(DT, '.data.table.locked', TRUE) +test(2074.10, attr(.shallow(DT, unlock=TRUE), '.data.table.locked'), NULL) +## coverage of rowidv & rleidv +test(2074.11, rowidv(1:10, cols=1), error="x is a single vector, non-NULL 'cols'") +test(2074.12, rowidv(1:10), rep(1L, 10L)) +test(2074.13, rowidv(list(1:10), cols=integer()), error="x is a list, 'cols' cannot be 0-length") +test(2074.14, rleidv(1:10, cols=1), error="x is a single vector, non-NULL 'cols'") +test(2074.15, rleidv(list(1:10), cols=integer()), error="x is a list, 'cols' cannot be 0-length") +## coverage of .prepareFastSubset +DT = data.table(V1=c('a', 'b', 'a'), V2 = c('hello', 'ello', 'llo'), x=TRUE) +test(2074.16, nrow(DT[!(V1=='a' & V2 %like% 'll')]), 1L) +y = c(TRUE, FALSE, FALSE) +test(2074.17, nrow(DT[x & y]), 1L) +setkey(DT, V1) +test(2074.18, DT[V1=='a', verbose=TRUE], output='Optimized subsetting with key') + +# print.data.table +DT2 = data.table(a=1:101) +test(2074.19, length(capture.output(print(DT2, nrows=1i))), 12L) +test(2074.20, length(capture.output(print(DT2[-1L], nrows=1i))), 102L) +test(2074.21, length(capture.output(print(DT2, nrows=-1L))), 0L) +test(2074.22, length(capture.output(print(DT2, topn=1i))), 12L) +test(2074.23, capture.output(print(DT2, topn=1L, col.names='none')), + c(" 1: 1", " --- ", "101: 101")) + +# foverlaps +x = data.table(start=NA_integer_, end=1L, key='start,end') +y = copy(x) +test(2074.24, foverlaps(x, y), error="NA values in data.table 'x' start column") +x[ , start := 0L] +setkey(x, start, end) +test(2074.25, foverlaps(x, y), error="NA values in data.table 'y' start column") +setkey(y, end, start) +test(2074.26, foverlaps(x, y), error="NA values in data.table 'y' end column") + +# cube +test(2074.27, cube(DT, by=1L), error="Argument 'by' must be a character") +test(2074.28, cube(DT, by='a', id=1L), error="Argument 'id' must be a logical") + +# groupingsets +test(2074.29, groupingsets(DT, .(grouping=max(1)), by='V1', sets=list('V1'), id=TRUE), + error="When using `id=TRUE` the 'j' expression must not evaluate to a column named 'grouping'") + +# tstrsplit +test(2074.30, tstrsplit('a', names=1L), error="'names' must be TRUE/FALSE or a character vector") + +# fcast with eval in fun.aggregate +DT[ , z := 0L] +test(2074.31, dcast(DT, V1 ~ z, fun.aggregate=eval(quote(length)), value.var='z'), + data.table(V1=c('a', 'b'), `0`=2:1,key='V1')) + +# fwrite both logical args +test(2074.32, fwrite(DT, logical01=TRUE, logicalAsInt=TRUE), error="logicalAsInt has been renamed") + +# merge.data.table +test(2074.33, merge(DT, DT, by.x = 1i, by.y=1i), error="A non-empty vector of column names are required") + +# shift naming +test(2074.34, shift(list(a=1:5, b=6:10), give.names=TRUE), list(a_lag_1=c(NA, 1:4), b_lag_1=c(NA, 6:9))) +test(2074.35, shift(1:5, 1:2, give.names=TRUE), list(V1_lag_1=c(NA, 1:4), V1_lag_2=c(NA, NA, 1:3))) + +# bmerge.c +x = data.table(a='a') +test(2074.36, bmerge(x, x, 1L, 1L, 0, FALSE, 0L, "all", '==', FALSE), error="rollends must be a length 2") +test(2074.37, bmerge(x, x, 1L, 1L, 'nearest', c(TRUE, FALSE), 0L, "all", 1L, FALSE), error="roll='nearest' can't be applied to a character") +# trigger reallocation on big non-equi-join +set.seed(384) +d = data.table(a=sample(150, 150, TRUE), b=1:150) +test(2074.38, nrow(d[d, on = .(a>a, b>b), allow.cartesian=TRUE]), 5722L) + +# fread.c +## ok / quoted branch of parse_double_extended +test(2074.39, fread('a,b\n"Inf,2\n'), data.table(a='"Inf', b=2L), warning="Found and resolved") +## verbose output +test(2074.40, fread('a\n1', na.strings=character(), verbose=TRUE), output='No NAstrings provided') +test(2074.41, fread('a\n1', na.strings='9', verbose=TRUE), output='One or more of the NAstrings looks like a number') + ################################### # Add new tests above this line # diff --git a/src/bmerge.c b/src/bmerge.c index a7246786a8..47bf74ed38 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -68,6 +68,7 @@ SEXP bmerge(SEXP iArg, SEXP xArg, SEXP icolsArg, SEXP xcolsArg, SEXP isorted, SE roll = 0.0; rollToNearest = FALSE; if (isString(rollarg)) { if (strcmp(CHAR(STRING_ELT(rollarg,0)),"nearest") != 0) error("roll is character but not 'nearest'"); + if (TYPEOF(VECTOR_ELT(i, icols[ncol-1]-1))==STRSXP) error("roll='nearest' can't be applied to a character column, yet."); roll=1.0; rollToNearest=TRUE; // the 1.0 here is just any non-0.0, so roll!=0.0 can be used later } else { if (!isReal(rollarg)) error("Internal error: roll is not character or double"); // # nocov @@ -77,8 +78,6 @@ SEXP bmerge(SEXP iArg, SEXP xArg, SEXP icolsArg, SEXP xcolsArg, SEXP isorted, SE if (!isLogical(rollendsArg) || LENGTH(rollendsArg) != 2) error("rollends must be a length 2 logical vector"); rollends = LOGICAL(rollendsArg); - if (rollToNearest && TYPEOF(VECTOR_ELT(i, icols[ncol-1]-1))==STRSXP) - error("roll='nearest' can't be applied to a character column, yet."); // nomatch arg nomatch = INTEGER(nomatchArg)[0]; diff --git a/src/fread.c b/src/fread.c index f4df818136..bb885eb088 100644 --- a/src/fread.c +++ b/src/fread.c @@ -558,7 +558,7 @@ static void Field(FieldParseContext *ctx) } break; default: - return; // Internal error: undefined quote rule + return; // # nocov Internal error: undefined quote rule } target->len = (int32_t)(ch - fieldStart); target->off = (int32_t)(fieldStart - ctx->anchor); @@ -1092,7 +1092,7 @@ int freadMain(freadMainArgs _args) { bool warningsAreErrors = args.warningsAreErrors; if (freadCleanup()) { - DTWARN("Previous fread() session was not cleaned up properly. Cleaned up ok at the beginning of this fread() call.\n"); + DTWARN("Previous fread() session was not cleaned up properly. Cleaned up ok at the beginning of this fread() call.\n"); // # nocov } if (verbose) DTPRINT("[01] Check arguments\n"); @@ -1202,8 +1202,8 @@ int freadMain(freadMainArgs _args) { if (fd==-1) STOP("file not found: %s",fnam); struct stat stat_buf; if (fstat(fd, &stat_buf) == -1) { - close(fd); - STOP("Opened file ok but couldn't obtain its size: %s", fnam); + close(fd); // # nocov + STOP("Opened file ok but couldn't obtain its size: %s", fnam); // # nocov } fileSize = (size_t) stat_buf.st_size; if (fileSize == 0) {close(fd); STOP("File is empty: %s", fnam);} @@ -1243,14 +1243,14 @@ int freadMain(freadMainArgs _args) { CloseHandle(hFile); // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa366537(v=vs.85).aspx if (mmp == NULL) { #endif - int nbit = 8*sizeof(char *); + int nbit = 8*sizeof(char *); // #nocov STOP("Opened %s file ok but could not memory map it. This is a %dbit process. %s.", filesize_to_str(fileSize), nbit, - nbit<=32 ? "Please upgrade to 64bit" : "There is probably not enough contiguous virtual memory available"); + nbit<=32 ? "Please upgrade to 64bit" : "There is probably not enough contiguous virtual memory available"); // # nocov } sof = (const char*) mmp; if (verbose) DTPRINT(" Memory mapped ok\n"); } else { - STOP("Neither `input` nor `filename` are given, nothing to read."); + STOP("Internal error: Neither `input` nor `filename` are given, nothing to read."); // # nocov } eof = sof + fileSize; tMap = wallclock(); @@ -1893,9 +1893,9 @@ int freadMain(freadMainArgs _args) { ch = pos; memcpy(tmpType, type, (size_t)ncol) ; if (!userOverride(type, colNames, colNamesAnchor, ncol)) { // colNames must not be changed but type[] can be - if (verbose) DTPRINT(" Cancelled by user: userOverride() returned false."); - freadCleanup(); - return 1; + if (verbose) DTPRINT(" Cancelled by user: userOverride() returned false."); // # nocov + freadCleanup(); // # nocov + return 1; // # nocov } ndrop = 0; int nUserBumped=0;