From 96b3b6f0b59bc73f68df3b848cca3ce7fd1a42d2 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 6 Sep 2019 11:33:27 -0700 Subject: [PATCH] named arguments for non-vectors get used as column name prefix again --- R/as.data.table.R | 15 +++++++++++---- R/data.table.R | 5 +++-- R/setkey.R | 3 ++- R/utils.R | 13 ++++++++----- inst/tests/tests.Rraw | 9 +++++++-- 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index cd52cdab50..d9481dbebf 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -115,7 +115,14 @@ as.data.table.array = function(x, keep.rownames=FALSE, key=NULL, sorted=TRUE, va ans[] } -as.data.table.list = function(x, keep.rownames=FALSE, key=NULL, check.names=FALSE, ...) { +as.data.table.list = function(x, + keep.rownames=FALSE, + key=NULL, + check.names=FALSE, + .named=NULL, # (internal) whether the argument was named in the data.table() or cbind() call calling this as.data.table.list() + # e.g. cbind(foo=DF1, bar=DF2) have .named=c(TRUE,TRUE) due to the foo= and bar= and trigger "prefix." for non-vector items + ...) +{ n = length(x) eachnrow = integer(n) # vector of lengths of each column. may not be equal if silent repetition is required. eachncol = integer(n) @@ -166,15 +173,15 @@ as.data.table.list = function(x, keep.rownames=FALSE, key=NULL, check.names=FALS if (eachnrow[i]==0L && nrow>0L && is.atomic(xi)) # is.atomic to ignore list() since list() is a common way to initialize; let's not insist on list(NULL) warning("Item ", i, " has 0 rows but longest item has ", nrow, "; filled with NA") # the rep() in recycle() above creates the NA vector if (is.data.table(xi)) { # matrix and data.frame were coerced to data.table above - # vnames[[i]] = names(xi) #if (nm!="" && n>1L) paste(nm, names(xi), sep=".") else names(xi) + prefix = if (!isFALSE(.named[i]) && isTRUE(nchar(names(x)[i])>0L)) paste0(names(x)[i],".") else "" # test 2058.12 for (j in seq_along(xi)) { ans[[k]] = recycle(xi[[j]], nrow) - vnames[k] = names(xi)[j] + vnames[k] = paste0(prefix, names(xi)[j]) k = k+1L } } else { nm = names(x)[i] - vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i) + vnames[k] = if (length(nm) && !is.na(nm) && nm!="") nm else paste0("V",i) # i (not k) tested by 2058.14 to be the same as the past for now ans[[k]] = recycle(xi, nrow) k = k+1L } diff --git a/R/data.table.R b/R/data.table.R index 0d4418ec70..91ede5d212 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -50,10 +50,11 @@ data.table = function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, str # NOTE: It may be faster in some circumstances for users to create a data.table by creating a list l # first, and then setattr(l,"class",c("data.table","data.frame")) and forgo checking. x = list(...) # list() doesn't copy named inputs as from R >= 3.1.0 (a very welcome change) - names(x) = name_dots(...) + nd = name_dots(...) + names(x) = nd$vnames if (length(x)==0L) return( null.data.table() ) if (length(x)==1L && (is.null(x[[1L]]) || (is.list(x[[1L]]) && length(x[[1L]])==0L))) return( null.data.table() ) #5377 - ans = as.data.table.list(x, keep.rownames=keep.rownames, check.names=check.names) # see comments inside as.data.table.list re copies + ans = as.data.table.list(x, keep.rownames=keep.rownames, check.names=check.names, .named=nd$.named) # see comments inside as.data.table.list re copies if (!is.null(key)) { if (!is.character(key)) stop("key argument of data.table() must be character") if (length(key)==1L) { diff --git a/R/setkey.R b/R/setkey.R index 60472603af..42a4355d5c 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -339,7 +339,8 @@ CJ = function(..., sorted = TRUE, unique = FALSE) if (is.null(vnames <- names(l))) vnames = paste0("V", seq_len(length(l))) else if (any(tt <- vnames=="")) vnames[tt] = paste0("V", which(tt)) } else { - vnames = name_dots(...) + vnames = name_dots(...)$vnames + if (any(tt <- vnames=="")) vnames[tt] = paste0("V", which(tt)) } dups = FALSE # fix for #1513 for (i in seq_along(l)) { diff --git a/R/utils.R b/R/utils.R index 4f5d7c7238..0426a06654 100644 --- a/R/utils.R +++ b/R/utils.R @@ -74,12 +74,15 @@ name_dots = function(...) { } else { vnames[is.na(vnames)] = "" } - for (i in which(vnames=="")) { - if ((tmp <- deparse(dot_sub[[i]])[1L]) == make.names(tmp)) - vnames[i] = tmp + notnamed = vnames=="" + if (any(notnamed)) { + syms = sapply(dot_sub, is.symbol) # save the deparse() in most cases of plain symbol + for (i in which(notnamed)) { + tmp = if (syms[i]) as.character(dot_sub[[i]]) else deparse(dot_sub[[i]])[1L] + if (tmp == make.names(tmp)) vnames[i]=tmp + } } - if (length(w<-which(vnames==""))) vnames[w] = paste0("V", w) - vnames + list(vnames=vnames, .named=!notnamed) } # convert a vector like c(1, 4, 3, 2) into a string like [1, 4, 3, 2] diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b2d014bef6..93aa828bf2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15102,11 +15102,11 @@ test(2056, as.data.table(a), ans) DF = structure(list(a=1:3, b=data.frame(foo=4:6, bar=7:9)), row.names=1:3, class="data.frame") DT = as.data.table(DF) test(2057.1, ncol(DT), 3L) -test(2057.2, DT[2], data.table(a=2L, foo=5L, bar=8L)) +test(2057.2, DT[2], data.table(a=2L, b.foo=5L, b.bar=8L)) DF = structure(list(a=list(c("a","b"), c("a","b"), c("a","b")), b=data.frame(foo=1:3, bar=4:6)), row.names=1:3, class="data.frame") # A list being first is needed to mimic the jsonlite case. With this passing, test.R works from https://github.com/Rdatatable/data.table/issues/3369#issuecomment-462662752 DT = as.data.table(DF) -test(2057.3, DT, data.table(a=list(c("a","b"), c("a","b"), c("a","b")), foo=1:3, bar=4:6)) +test(2057.3, DT, data.table(a=list(c("a","b"), c("a","b"), c("a","b")), b.foo=1:3, b.bar=4:6)) # one and two+ row cases of data.table, as.data.table and cbind involving list columns, given # the change to tests 1613.571-3 in PR#3471 in v1.12.4 @@ -15140,6 +15140,11 @@ test(2058.08, sapply(ans, class), c(V1="integer", V2="list")) test(2058.09, data.table( data.table(1:2), list(c("a","b"),"a") ), ans) # yes test(2058.10, as.data.table(list(data.table(1:2), list(c("a","b"),"a"))), ans) # yes test(2058.11, cbind(data.table(1:2), list(c("a","b"),"a")), ans) # yes +test(2058.12, cbind(first=data.table(A=1:3), second=data.table(A=4, B=5:7)), + data.table(first.A=1:3, second.A=4, second.B=5:7)) # no +test(2058.13, cbind(data.table(A=1:3), second=data.table(A=4, B=5:7)), + data.table(A=1:3, second.A=4, second.B=5:7)) # no +test(2058.14, cbind(data.table(A=1,B=2),3), data.table(A=1,B=2,V2=3)) # no # rbindlist improved error message, #3638 DT = data.table(a=1)