From ddbb74b0de1e77ba5548dcc571b2e98282bed18f Mon Sep 17 00:00:00 2001 From: David Simons Date: Wed, 29 Jan 2020 07:20:49 -0500 Subject: [PATCH] reallocate recursively in copy, fixes #4205 --- NEWS.md | 2 ++ R/data.table.R | 23 ++++++++++------------- inst/tests/tests.Rraw | 2 ++ 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2d607e9bad..cb3990e440 100644 --- a/NEWS.md +++ b/NEWS.md @@ -87,6 +87,8 @@ unit = "s") 6. `all.equal(DT, y)` no longer errors when `y` is not a data.table, [#4042](https://github.com/Rdatatable/data.table/issues/4042). Thanks to @d-sci for reporting and the PR. +7. `copy()` overallocates properly when input is a *nested* list of data.tables, [#4205](https://github.com/Rdatatable/data.table/issues/4205). Thanks to @d-sci for reporting and the PR. + ## NOTES 1. `as.IDate`, `as.ITime`, `second`, `minute`, and `hour` now recognize UTC equivalents for speed: GMT, GMT-0, GMT+0, GMT0, Etc/GMT, and Etc/UTC, [#4116](https://github.com/Rdatatable/data.table/issues/4116). diff --git a/R/data.table.R b/R/data.table.R index af4f14de0b..ace3523a7b 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2335,21 +2335,18 @@ copy = function(x) { newx = .Call(Ccopy,x) # copies at length but R's duplicate() also copies truelength over. # TO DO: inside Ccopy it could reset tl to 0 or length, but no matter as selfrefok detects it # TO DO: revisit duplicate.c in R 3.0.3 and see where it's at - if (!is.data.table(x)) { - # fix for #1476. TODO: find if a cleaner fix is possible.. - if (is.list(x)) { - anydt = vapply_1b(x, is.data.table, use.names=FALSE) - if (sum(anydt)) { - newx[anydt] = lapply(newx[anydt], function(x) { - .Call(C_unlock, x) - setalloccol(x) - }) - } + + reallocate = function(y) { + if (is.data.table(y)) { + .Call(C_unlock, y) + setalloccol(y) + } else if (is.list(y)) { + y[] = lapply(y, reallocate) } - return(newx) # e.g. in as.data.table.list() the list is copied before changing to data.table + y } - .Call(C_unlock, newx) - setalloccol(newx) + + reallocate(newx) } .shallow = function(x, cols = NULL, retain.key = FALSE, unlock = FALSE) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f6d17a0762..ab0911ad77 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8274,6 +8274,8 @@ test(1606, fread("2,\n1,a,b", fill=TRUE), data.table(V1=2:1, V2=c("","a"), V3=c( dt = data.table(resp=c(1:5)) wide = copy(list(metrics = dt))$metrics # copy here copies the list of data.table and therefore didn't overallocate before.. test(1607, wide[, id := .I], data.table(resp = 1:5, id = 1:5)) +wide = copy(list(a = list(b = dt)))$a$b # check again on doubly nested list +test(1607.1, wide[, id := .I], data.table(resp = 1:5, id = 1:5)) # better fix for #1462, + improved error message (if this better fix fails) # no need for quote="" and sep="\t"..