diff --git a/NEWS.md b/NEWS.md index b639b2aedf..bfa80520ed 100644 --- a/NEWS.md +++ b/NEWS.md @@ -331,6 +331,8 @@ 45. `rbindlist` (and printing a `data.table` with over 100 rows because that uses `rbindlist(head, tail)`) could error with `malformed factor` for unordered factor columns containing a used `NA_character_` level, [#3915](https://github.com/Rdatatable/data.table/issues/3915). This is an unusual input for unordered factors because NA_integer_ is recommended by default in R. Thanks to @sindribaldur for reporting. +46. Adding a `list` column containing an item of type `list` to a one row `data.table` could fail, [#3626](https://github.com/Rdatatable/data.table/issues/3626). Thanks to Jakob Richter for reporting. + ## NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. diff --git a/R/data.table.R b/R/data.table.R index 8773f1391e..5ad3d01f89 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2498,12 +2498,6 @@ setcolorder = function(x, neworder=key(x)) set = function(x,i=NULL,j,value) # low overhead, loopable { - if (is.atomic(value)) { - # protect NAMED of atomic value from .Call's NAMED=2 by wrapping with list() - l = vector("list", 1L) - .Call(Csetlistelt,l,1L,value) # to avoid the copy by list() in R < 3.1.0 - value = l - } .Call(Cassign,x,i,j,NULL,value) invisible(x) } diff --git a/R/tables.R b/R/tables.R index 3ebdfa22ad..bcfab0c674 100644 --- a/R/tables.R +++ b/R/tables.R @@ -14,16 +14,14 @@ tables = function(mb=TRUE, order.col="NAME", width=80, DT_names = all_obj[is_DT] info = rbindlist(lapply(DT_names, function(dt_n){ DT = get(dt_n, envir=env) # doesn't copy - info_i = - data.table(NAME = dt_n, - NROW = nrow(DT), - NCOL = ncol(DT)) - if (mb) set(info_i, , "MB", round(as.numeric(object.size(DT))/1024^2)) - # mb is an option because object.size() appears to be slow. TO DO: revisit - set(info_i, , "COLS", list(list(names(DT)))) - set(info_i, , "KEY", list(list(key(DT)))) - if (index) set(info_i, , "INDICES", list(list(indices(DT)))) - info_i + data.table( # data.table excludes any NULL items (MB and INDICES optional) unlike list() + NAME = dt_n, + NROW = nrow(DT), + NCOL = ncol(DT), + MB = if (mb) round(as.numeric(object.size(DT))/1024^2), # object.size() is slow hence optional; TODO revisit + COLS = list(names(DT)), + KEY = list(key(DT)), + INDICES = if (index) list(indices(DT))) })) if (!order.col %chin% names(info)) stop("order.col='",order.col,"' not a column name of info") info = info[base::order(info[[order.col]])] # base::order to maintain locale ordering of table names diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d61cd583cb..aaa547f45b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1006,8 +1006,10 @@ test(350.6, DT[c(0,0,0), .N], 0L) # Test recycling list() on RHS of := DT = data.table(a=1:3,b=4:6,c=7:9,d=10:12) -test(351.1, DT[,c("a","b"):=list(13:15)], ans<-data.table(a=13:15,b=13:15,c=7:9,d=10:12)) -test(351.2, DT[,c("a","b"):=13:15], ans) +test(351.1, DT[, c("a","b"):=list(13:15), verbose=TRUE], ans<-data.table(a=13:15,b=13:15,c=7:9,d=10:12), + output="RHS_list_of_columns == true.*Recycling single RHS list item across 2 columns") +test(351.2, DT[, c("a","b"):=13:15, verbose=TRUE], ans, + notOutput="revised") test(352.1, DT[,letters[1:4]:=list(1L,NULL)], error="Supplied 4 columns to be assigned 2 items. Please see NEWS for v1.12.2") test(352.2, DT[,letters[1:4]:=list(1L,NULL,2L,NULL)], data.table(a=c(1L,1L,1L),c=c(2L,2L,2L))) @@ -4860,29 +4862,34 @@ test(1293, ans1, ans2) dt <- data.table(a=1:3, b=c(7,8,9), c=c(TRUE, NA, FALSE), d=as.list(4:6), e=c("a", "b", "c")) test(1294.01, dt[, a := 1]$a, rep(1L, 3L)) -test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), warning="1.5.*double.*position 1 truncated.*integer.*column 1 named 'a'") +test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), + warning="1.5.*double.*position 1 truncated.*integer.*column 1 named 'a'") test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L)) test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L), - warning=c("NAs introduced by coercion", - "Coerced 'character' RHS to 'integer'.*column 1 named 'a'")) -test(1294.05, dt[, a := list(list(1))]$a, error="Cannot coerce 'list' RHS to 'integer' to match.*column 1 named 'a'") + warning=c("Coercing 'character' RHS to 'integer'.*column 1 named 'a'", + "NAs introduced by coercion")) +test(1294.05, dt[, a := list(list(1))]$a, rep(1L, 3L), + warning="Coercing 'list' RHS to 'integer' to match.*column 1 named 'a'") test(1294.06, dt[, a := list(1L)]$a, rep(1L, 3L)) test(1294.07, dt[, a := list(1)]$a, rep(1L, 3L)) test(1294.08, dt[, a := TRUE]$a, rep(1L, 3L)) test(1294.09, dt[, b := 1L]$b, rep(1,3)) test(1294.10, dt[, b := NA]$b, rep(NA_real_,3)) test(1294.11, dt[, b := "bla"]$b, rep(NA_real_, 3), - warning=c("NAs introduced by coercion", - "Coerced 'character' RHS to 'double' to match.*column 2 named 'b'")) -test(1294.12, dt[, b := list(list(1))]$b, error="Cannot coerce 'list' RHS to 'double' to match.*column 2 named 'b'") + warning=c("Coercing 'character' RHS to 'double' to match.*column 2 named 'b'", + "NAs introduced by coercion")) +test(1294.12, dt[, b := list(list(1))]$b, rep(1,3), + warning="Coercing 'list' RHS to 'double' to match.*column 2 named 'b'") test(1294.13, dt[, b := TRUE]$b, rep(1,3)) test(1294.14, dt[, b := list(1)]$b, rep(1,3)) test(1294.15, dt[, c := 1]$c, rep(TRUE, 3)) test(1294.16, dt[, c := 1L]$c, rep(TRUE, 3)) test(1294.17, dt[, c := NA]$c, rep(NA, 3)) test(1294.18, dt[, c := list(1)]$c, rep(TRUE, 3)) -test(1294.19, dt[, c := list(list(1))]$c, error="Cannot coerce 'list' RHS to 'logical' to match.*column 3 named 'c'") -test(1294.20, dt[, c := "bla"]$c, rep(NA, 3), warning="Coerced 'character' RHS to 'logical'") +test(1294.19, dt[, c := list(list(1))]$c, rep(TRUE, 3), + warning="Coercing 'list' RHS to 'logical' to match.*column 3 named 'c'") +test(1294.20, dt[, c := "bla"]$c, rep(NA, 3), + warning="Coercing 'character' RHS to 'logical'") test(1294.21, dt[, d := 1]$d, rep(list(1), 3)) test(1294.22, dt[, d := 1L]$d, rep(list(1L), 3)) test(1294.23, dt[, d := TRUE]$d, rep(list(TRUE), 3)) @@ -4891,14 +4898,17 @@ test(1294.25, dt[, d := list(list(1))]$d, rep(list(1), 3)) test(1294.26, dt[, e := 1]$e, rep("1", 3)) test(1294.27, dt[, e := 1L]$e, rep("1", 3)) test(1294.28, dt[, e := TRUE]$e, rep("TRUE", 3)) -### -### TEMPORARILY OFF IN DEV. TO BE ADDRESSED BEFORE v1.12.4 RELEASE -### REVISIT IN #3909 or #3626 -### -### test(1294.29, dt[, e := list(list(1))]$e, rep("1", 3)) -### +test(1294.29, dt[, e := list(list(1))]$e, rep("1", 3), # e.g. revdep NNS does this; PR #3925 + warning="Coercing 'list' RHS to 'character' to match.*column 5 named 'e'") test(1294.30, dt[, e := "bla"]$e, rep("bla", 3)) test(1294.31, dt[, e := list("bla2")]$e, rep("bla2", 3)) +if (test_bit64) { + dt[, f:=as.integer64(10:12)] + test(1294.50, dt[, f:=1]$f, as.integer64(rep(1,3))) + test(1294.51, dt[, f:=NA]$f, as.integer64(rep(NA,3))) + test(1294.52, dt[, f:=list(list(1))]$f, + error="Cannot coerce 'list' RHS to 'integer64' to match.*column 6 named 'f'") +} # FR #5357, when LHS evaluates to integer(0), provide warning and return dt, not an error. dt = data.table(a = 1:5, b1 = 1:5, b2 = 1:5) @@ -15096,7 +15106,7 @@ inner = data.table(a=1:3, b=1:3) outer = data.table(aa=1, ab=list(inner)) old = options(datatable.verbose=TRUE) test(2049.1, outer$ab[[1]]$b <- 4L, 4L, # also tests the incorrect warning has gone - output="RHS_list_of_columns revised to false .df=0.*RHS_list_of_columns revised to false .df=1") + notOutput="revised") options(old) test(2049.2, outer$ab, list(data.table(a=1:3, b=4L))) test(2049.3, outer$ab[[1]][, b := 5L], data.table(a=1:3, b=5L)) @@ -15221,6 +15231,8 @@ test(2058.17, as.data.table(L), L = list(1:3, NULL, 4:6) test(2058.18, length(L), 3L) test(2058.19, as.data.table(L), data.table(V1=1:3, V2=4:6)) # V2 not V3 # no +DT = data.table(a=1:3, b=c(4,5,6)) +test(2058.20, DT[,b:=list(NULL)], data.table(a=1:3)) # no # rbindlist improved error message, #3638 DT = data.table(a=1) @@ -16283,6 +16295,25 @@ DT = data.table(A=rep(1:2,each=3), B=3:4, v=letters[1:6]) test(2118.1, DT[B==3L,v:=NA,by=A]$v, c(NA,"b",NA,"d",NA,"f")) test(2118.2, DT[,v:=NA,by=A]$v, rep(NA_character_,6L)) +# adding list column containing lists to a one-row data.table, #3626 +# tests 01-04 used to fail, now work +DT = data.table(a = 1) +list_column = list(list(a = 1, b = 2)) +test(2119.01, DT$b <- list_column, list_column) +test(2119.02, DT, ans<-data.table(a=1, b=list(list(a=1, b=2)))) +DT = data.table(a = 1) +test(2119.03, DT[, b:=list_column], ans) +test(2119.04, data.table(a=1L)[, newcol := list(list(2L, 3L))], data.table(a=1L, newcol=list(list(2L,3L)))) +# extra tests 10-17 from Jan in the issue, all no change from 1.12.2 +test(2119.10, data.table(a=1L)[, newcol := list(2L)], ans<-data.table(a=1L, newcol=2L)) +test(2119.11, data.table(a=1L)[, newcol := 2L], ans) +test(2119.12, data.table(a=1:2)[, newcol := list(2L)], ans<-data.table(a=1:2, newcol=2L)) +test(2119.13, data.table(a=1:2)[, newcol := 2L], ans) +test(2119.14, data.table(a=1L)[, newcol := list(list(2L))], data.table(a=1L, newcol=list(2L))) +test(2119.15, data.table(a=1L)[, newcol := list(2L, 3L)], error="Supplied 2 items to be assigned to 1 item") +test(2119.16, data.table(a=1:2)[, newcol := list(list(2L, 3L))], ans<-data.table(a=1:2, newcol=list(2L,3L))) +test(2119.17, data.table(a=1:2)[, newcol := list(2L, 3L)], ans) + ################################### # Add new tests above this line # diff --git a/src/assign.c b/src/assign.c index 090327c696..1d33f9c3c2 100644 --- a/src/assign.c +++ b/src/assign.c @@ -376,21 +376,27 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) } if (any_duplicated(cols,FALSE)) error("Can't assign to the same column twice in the same query (duplicates detected)."); if (!isNull(newcolnames) && !isString(newcolnames)) error("newcolnames is supplied but isn't a character vector"); - bool RHS_list_of_columns = TYPEOF(values)==VECSXP && (length(cols)>1 || LENGTH(values)==1); // initial value; may be revised below + bool RHS_list_of_columns = TYPEOF(values)==VECSXP && length(cols)>1; // initial value; may be revised below if (verbose) Rprintf("RHS_list_of_columns == %s\n", RHS_list_of_columns ? "true" : "false"); + if (TYPEOF(values)==VECSXP && length(cols)==1 && length(values)==1) { + SEXP item = VECTOR_ELT(values,0); + if (isNull(item) || length(item)==1 || length(item)==targetlen) { + RHS_list_of_columns=true; + if (verbose) Rprintf("RHS_list_of_columns revised to true because RHS list has 1 item which is NULL, or whose length %d is either 1 or targetlen (%d). Please unwrap RHS.\n", length(item), targetlen); + } + } if (RHS_list_of_columns) { if (length(values)==0) error("Supplied %d columns to be assigned an empty list (which may be an empty data.table or data.frame since they are lists too). " "To delete multiple columns use NULL instead. To add multiple empty list columns, use list(list()).", length(cols)); - if (length(values)>1 && length(values)!=length(cols)) - error("Supplied %d columns to be assigned %d items. Please see NEWS for v1.12.2. Try adding/removing a list() or .() wrapper around the RHS.", length(cols), length(values)); - if (length(values)==1) { - // c("colA","colB"):=list(13:15) should use 13:15 for both columns (recycle 1 item ok). So just change RHS so we don't have to deal with recycling-length-1 later - SEXP item = VECTOR_ELT(values,0); - bool df; - if (!(df=INHERITS(item, char_dataframe))) values = item; // if() for #3474 - RHS_list_of_columns = false; - if (verbose) Rprintf("RHS_list_of_columns revised to false (df=%d)\n", df); + if (length(values)!=length(cols)) { + if (length(values)==1) { // test 351.1; c("colA","colB"):=list(13:15) uses 13:15 for both columns + values = VECTOR_ELT(values,0); + RHS_list_of_columns = false; + if (verbose) Rprintf("Recycling single RHS list item across %d columns. Please unwrap RHS.\n", length(cols)); + } else { + error("Supplied %d columns to be assigned %d items. Please see NEWS for v1.12.2.", length(cols), length(values)); + } } } // Check all inputs : @@ -824,15 +830,21 @@ const char *memrecycle(SEXP target, SEXP where, int start, int len, SEXP source, } } } else if (isString(source) && !isString(target) && !isNewList(target)) { + warning("Coercing 'character' RHS to '%s' to match the type of the target column (column %d named '%s').", + type2char(TYPEOF(target)), colnum, colname); + // this "Coercing ..." warning first to give context in case coerceVector warns 'NAs introduced by coercion' source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; - warning("Coerced 'character' RHS to '%s' to match the type of the target column (column %d named '%s').", + } else if (isNewList(source) && !isNewList(target)) { + if (targetIsI64) { + error("Cannot coerce 'list' RHS to 'integer64' to match the type of the target column (column %d named '%s').", colnum, colname); + // because R's coerceVector doesn't know about integer64 + } + // as in base R; e.g. let as.double(list(1,2,3)) work but not as.double(list(1,c(2,4),3)) + // relied on by NNS, simstudy and table.express; tests 1294.* + warning("Coercing 'list' RHS to '%s' to match the type of the target column (column %d named '%s').", type2char(TYPEOF(target)), colnum, colname); + source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) { - if (isNewList(source)) { - error("Cannot coerce 'list' RHS to '%s' to match the type of the target column (column %d named '%s').", - type2char(TYPEOF(target)), colnum, colname); - } - if (GetVerbose()) { // only take the (small) cost of GetVerbose() (search of options() list) when types don't match Rprintf("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n",