From 0a6fdf3f82057140c9028a794c8c9f6c2db63e95 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 10 Sep 2019 14:54:16 -0700 Subject: [PATCH 1/6] invalid on= natural downgraded from error to warning --- R/data.table.R | 12 ++++++++---- inst/tests/tests.Rraw | 8 ++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 6664dfb8bf..790a6eb475 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -160,10 +160,14 @@ replace_dot_alias = function(e) { naturaljoin = FALSE names_x = names(x) if (missing(i) && !missing(on)) { - i = eval.parent(.massagei(substitute(on))) - if (!is.list(i) || !length(names(i))) - stop("When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join (i.e. join on common names) is invoked") - naturaljoin = TRUE + tt = eval.parent(.massagei(substitute(on))) + if (!is.list(tt) || !length(names(tt))) { + warning("When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join (i.e. join on common names) is invoked. Ignoring on= which is '",class(tt)[1L],"'.") + on = NULL + } else { + i = tt + naturaljoin = TRUE + } } if (missing(i) && missing(j)) { tt_isub = substitute(i) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d1fb0126a0..5d20118935 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15714,8 +15714,12 @@ X = data.table(a=1:2, b=1:2) test(2076.01, X[on=.(a=2:3, d=2:1)], data.table(a=2:3, b=c(2L,NA_integer_), d=2:1)) Y = data.table(a=2:3, d=2:1) test(2076.02, X[on=Y], data.table(a=2:3, b=c(2L,NA_integer_), d=2:1)) -test(2076.03, X[on=3], error="When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join") -test(2076.04, X[on=list(3)], error="When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join") +test(2076.03, X[on=3], X, + warning=c("When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join.*Ignoring on= which is 'numeric'", + "i and j are both missing so ignoring the other arguments. This warning will be upgraded to error in future.")) +test(2076.04, X[on=list(3)], X, + warning=c("When on= is provided but not i=, on= must be a named list or data.table|frame, and a natural join.*Ignoring on= which is 'list'", + "i and j are both missing so ignoring the other arguments. This warning will be upgraded to error in future.")) # gsum int64 support #1647, #3464 if (test_bit64) { From 17aba9dadf3c4b42552fb5c41153eca1414cdf80 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 10 Sep 2019 22:08:01 -0700 Subject: [PATCH 2/6] interim --- R/as.data.table.R | 8 +++++--- R/data.table.R | 21 +++++++++++---------- inst/tests/tests.Rraw | 5 +++++ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index aeef3ab1a9..ef4e1e5a6e 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -166,6 +166,8 @@ as.data.table.list = function(x, } vnames = character(ncol) k = 1L + x_names = names(x) + all_blank = !is.null(x_names) && all(x_names=="") # all empty columns is special case to retain all-blank names if list(), as v1.12.2 and before did, for batchtools #3581 for(i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next @@ -174,21 +176,21 @@ as.data.table.list = function(x, if (eachnrow[i]==0L && nrow>0L && is.atomic(xi)) # is.atomic to ignore list() since list() is a common way to initialize; let's not insist on list(NULL) warning("Item ", i, " has 0 rows but longest item has ", nrow, "; filled with NA") # the rep() in recycle() above creates the NA vector if (is.data.table(xi)) { # matrix and data.frame were coerced to data.table above - prefix = if (!isFALSE(.named[i]) && isTRUE(nchar(names(x)[i])>0L)) paste0(names(x)[i],".") else "" # test 2058.12 + prefix = if (!isFALSE(.named[i]) && isTRUE(nchar(x_names[i])>0L)) paste0(x_names[i],".") else "" # test 2058.12 for (j in seq_along(xi)) { ans[[k]] = recycle(xi[[j]], nrow) vnames[k] = paste0(prefix, names(xi)[j]) k = k+1L } } else { - nm = names(x)[i] - vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i) # i (not k) tested by 2058.14 to be the same as the past for now + vnames[k] = if (length(nm<-x_names[i]) && !is.na(nm) && nm!="") nm else paste0("V",i) # i (not k) tested by 2058.14 to be the same as the past for now ans[[k]] = recycle(xi, nrow) k = k+1L } } if (any(vnames==".SD")) stop("A column may not be called .SD. That has special meaning.") if (check.names) vnames = make.names(vnames, unique=TRUE) + if (all_blank) vnames = character(ncol) setattr(ans, "names", vnames) setDT(ans, key=key) # copy ensured above; also, setDT handles naming ans diff --git a/R/data.table.R b/R/data.table.R index 790a6eb475..1a9060c41f 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2679,16 +2679,17 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { brackify(sprintf('%s:%d', names(tbl), tbl)), "\nThe first entry with fewer than ", n_range[2L], " entries is ", which.max(n Date: Tue, 10 Sep 2019 22:12:49 -0700 Subject: [PATCH 3/6] interim --- R/as.data.table.R | 8 +++----- R/data.table.R | 21 ++++++++++----------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index ef4e1e5a6e..aeef3ab1a9 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -166,8 +166,6 @@ as.data.table.list = function(x, } vnames = character(ncol) k = 1L - x_names = names(x) - all_blank = !is.null(x_names) && all(x_names=="") # all empty columns is special case to retain all-blank names if list(), as v1.12.2 and before did, for batchtools #3581 for(i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next @@ -176,21 +174,21 @@ as.data.table.list = function(x, if (eachnrow[i]==0L && nrow>0L && is.atomic(xi)) # is.atomic to ignore list() since list() is a common way to initialize; let's not insist on list(NULL) warning("Item ", i, " has 0 rows but longest item has ", nrow, "; filled with NA") # the rep() in recycle() above creates the NA vector if (is.data.table(xi)) { # matrix and data.frame were coerced to data.table above - prefix = if (!isFALSE(.named[i]) && isTRUE(nchar(x_names[i])>0L)) paste0(x_names[i],".") else "" # test 2058.12 + prefix = if (!isFALSE(.named[i]) && isTRUE(nchar(names(x)[i])>0L)) paste0(names(x)[i],".") else "" # test 2058.12 for (j in seq_along(xi)) { ans[[k]] = recycle(xi[[j]], nrow) vnames[k] = paste0(prefix, names(xi)[j]) k = k+1L } } else { - vnames[k] = if (length(nm<-x_names[i]) && !is.na(nm) && nm!="") nm else paste0("V",i) # i (not k) tested by 2058.14 to be the same as the past for now + nm = names(x)[i] + vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i) # i (not k) tested by 2058.14 to be the same as the past for now ans[[k]] = recycle(xi, nrow) k = k+1L } } if (any(vnames==".SD")) stop("A column may not be called .SD. That has special meaning.") if (check.names) vnames = make.names(vnames, unique=TRUE) - if (all_blank) vnames = character(ncol) setattr(ans, "names", vnames) setDT(ans, key=key) # copy ensured above; also, setDT handles naming ans diff --git a/R/data.table.R b/R/data.table.R index 1a9060c41f..790a6eb475 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2679,17 +2679,16 @@ setDT = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE) { brackify(sprintf('%s:%d', names(tbl), tbl)), "\nThe first entry with fewer than ", n_range[2L], " entries is ", which.max(n Date: Wed, 11 Sep 2019 09:17:10 -0700 Subject: [PATCH 4/6] interim --- R/as.data.table.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/as.data.table.R b/R/as.data.table.R index aeef3ab1a9..ed2b103493 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -191,6 +191,7 @@ as.data.table.list = function(x, if (check.names) vnames = make.names(vnames, unique=TRUE) setattr(ans, "names", vnames) setDT(ans, key=key) # copy ensured above; also, setDT handles naming + if (!is.null(names(x)) && length(ans)==length(x) && all(names(x)=="")) setattr(ans, "names", names(x)) ans } From 0761f4475144b698f0991c5a927f2979fb12d451 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 11 Sep 2019 11:44:42 -0700 Subject: [PATCH 5/6] passes batchtools now by retaining all-blank plain-list names --- R/as.data.table.R | 5 ++++- inst/tests/tests.Rraw | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index ed2b103493..ddece83ce3 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -128,6 +128,9 @@ as.data.table.list = function(x, eachnrow = integer(n) # vector of lengths of each column. may not be equal if silent repetition is required. eachncol = integer(n) missing.check.names = missing(check.names) + allBlankListNames = missing(.named) && # as.data.table called directly, not from inside data.table() + !is.data.frame(x) && # just a plain list passed in, not data.frame or data.table + !is.null(names(x)) && all(names(x)=="") # all names already set to ""; caller has deliberate intent to achieve this for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above @@ -191,7 +194,7 @@ as.data.table.list = function(x, if (check.names) vnames = make.names(vnames, unique=TRUE) setattr(ans, "names", vnames) setDT(ans, key=key) # copy ensured above; also, setDT handles naming - if (!is.null(names(x)) && length(ans)==length(x) && all(names(x)=="")) setattr(ans, "names", names(x)) + if (allBlankListNames) setattr(ans, "names", character(ncol(ans))) ans } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2def6a8220..e0aa634cbd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15321,7 +15321,7 @@ test(2060.605, fcoalesce(NULL, "foo"), NULL) # as seen in mlr using BBmisc::coal # #3650 -- ensure max nrow check on CJ is applied after unique l = replicate(ceiling(log10(.Machine$integer.max)), rep(1L, 10L), simplify = FALSE) l$unique = TRUE -test(2061, do.call(CJ, l), setkey(unique(as.data.table(head(l, -1L))))) +test(2061, do.call(CJ, l), data.table(V1=1L, V2=1L, V3=1L, V4=1L, V5=1L, V6=1L, V7=1L, V8=1L, V9=1L, V10=1L, key=paste0("V",1:10))) # #3635, not specific to non-equi joins, but only occurs in non-equi cases. d1 = data.table(a = c(1L, 6L), b = c(11L, 16L)) From 26d554def326ecc577ae7f0a168f2f5d6ec43b5c Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 11 Sep 2019 12:18:48 -0700 Subject: [PATCH 6/6] generalized to as.data.table.list(L) keeping names(L) as-is even blanks and dups, as 1.12.2 did and is relied on --- R/as.data.table.R | 6 ++---- inst/tests/tests.Rraw | 5 +++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index ddece83ce3..c077361be2 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -128,9 +128,7 @@ as.data.table.list = function(x, eachnrow = integer(n) # vector of lengths of each column. may not be equal if silent repetition is required. eachncol = integer(n) missing.check.names = missing(check.names) - allBlankListNames = missing(.named) && # as.data.table called directly, not from inside data.table() - !is.data.frame(x) && # just a plain list passed in, not data.frame or data.table - !is.null(names(x)) && all(names(x)=="") # all names already set to ""; caller has deliberate intent to achieve this + origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854 for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above @@ -194,7 +192,7 @@ as.data.table.list = function(x, if (check.names) vnames = make.names(vnames, unique=TRUE) setattr(ans, "names", vnames) setDT(ans, key=key) # copy ensured above; also, setDT handles naming - if (allBlankListNames) setattr(ans, "names", character(ncol(ans))) + if (length(origListNames)==length(ans)) setattr(ans, "names", origListNames) # PR 3854 and tests 2058.15-17 ans } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e0aa634cbd..2308b6dfa7 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15159,6 +15159,11 @@ test(2058.15, as.data.table(L), data.table(V1=1:3, V2=4:6)) # retain all-blank list names as batchtools relies on in reg$defs[1,job.pars], #3581 names(L) = c("","") test(2058.16, as.data.table(L), setnames(data.table(1:3, 4:6),c("",""))) # no +# retain existing duplicate and blank names of a plain-list, just as 1.12.2 did +L = list(1:3, 4:6, 7:9, 10:12) +names(L) = c("","foo","","foo") +test(2058.17, as.data.table(L), + setnames(data.table(1:3, 4:6, 7:9, 10:12),c("","foo","","foo"))) # no # rbindlist improved error message, #3638 DT = data.table(a=1)