From e32864b334b726e2666c519069b51d45f2f4186a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 5 Oct 2021 13:33:27 -0700 Subject: [PATCH 1/6] exclude `...` from tables() search --- R/tables.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/tables.R b/R/tables.R index 99c59f0c4d..c75a5da843 100644 --- a/R/tables.R +++ b/R/tables.R @@ -2,10 +2,11 @@ MB = NCOL = NROW = NULL tables = function(mb=TRUE, order.col="NAME", width=80, - env=parent.frame(), silent=FALSE, index=FALSE) + env=parent.frame(), silent=FALSE, index=FALSE) { # Prints name, size and colnames of all data.tables in the calling environment by default - all_obj = objects(envir=env, all.names=TRUE) + # include "hidden" objects (starting with .) via all.names=TRUE, but exclude ... specifically, #5197 + all_obj = grep("...", objects(envir=env, all.names=TRUE), invert=TRUE, fixed=TRUE, value=TRUE) is_DT = which(vapply_1b(all_obj, function(x) is.data.table(get(x, envir=env)))) if (!length(is_DT)) { if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) From 73c2b042c2e7882d5256332beb5a4dc276d3ff73 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 5 Oct 2021 20:39:25 +0000 Subject: [PATCH 2/6] add a test --- inst/tests/tests.Rraw | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 619ab67304..7f91d73168 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18222,3 +18222,5 @@ DT1 = data.table(id=1:3, grp=c('a', 'a', 'b'), value=4:6) DT2 = data.table(grp = c('a', 'b'), agg = list(c('1' = 4, '2' = 5), c('3' = 6))) test(2217, DT1[, by = grp, .(agg = list(setNames(as.numeric(value), id)))], DT2) +# tables() doesn't find `...`, #5197 +test(2218, (function(...) tables())(), output = "No objects of class data.table exist") From 75cc7380c14cd2ab21051158f38179664195b024 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 5 Oct 2021 20:50:03 +0000 Subject: [PATCH 3/6] small tweaks to avoid double-sorting --- R/tables.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/tables.R b/R/tables.R index c75a5da843..a5d4aa5f98 100644 --- a/R/tables.R +++ b/R/tables.R @@ -6,9 +6,9 @@ tables = function(mb=TRUE, order.col="NAME", width=80, { # Prints name, size and colnames of all data.tables in the calling environment by default # include "hidden" objects (starting with .) via all.names=TRUE, but exclude ... specifically, #5197 - all_obj = grep("...", objects(envir=env, all.names=TRUE), invert=TRUE, fixed=TRUE, value=TRUE) - is_DT = which(vapply_1b(all_obj, function(x) is.data.table(get(x, envir=env)))) - if (!length(is_DT)) { + all_obj = grep("...", objects(envir=env, all.names=TRUE, sorted=order.col == "NAME"), invert=TRUE, fixed=TRUE, value=TRUE) + is_DT = vapply_1b(all_obj, function(x) is.data.table(get(x, envir=env))) + if (!any(is_DT)) { if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) return(invisible(data.table(NULL))) } @@ -24,8 +24,11 @@ tables = function(mb=TRUE, order.col="NAME", width=80, KEY = list(key(DT)), INDICES = if (index) list(indices(DT))) })) - if (!order.col %chin% names(info)) stopf("order.col='%s' not a column name of info", order.col) - info = info[base::order(info[[order.col]])] # base::order to maintain locale ordering of table names + # objects() above handled the sorting for order.col=="NAME" + if (order.col != "NAME") { + if (!order.col %chin% names(info)) stopf("order.col='%s' not a column name of info", order.col) + info = info[base::order(info[[order.col]])] # base::order to maintain locale ordering of table names + } if (!silent) { # prettier printing on console pretty_format = function(x, width) { From 3412a2c058ddebaf2209561f63fde6e13e5bb775 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 5 Oct 2021 21:53:06 +0000 Subject: [PATCH 4/6] coverage --- inst/tests/tests.Rraw | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7f91d73168..640bbd87ac 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7490,7 +7490,8 @@ test(1536, duplicated(dt, incomparables=TRUE), error = "argument 'incomparables test(1537 , names(melt(dt, id.vars=1L, variable.name = "x", value.name="x")), c("x", "x.1", "x.2"), output = "Duplicate column names") # test for tables() -test(1538, tables(), output = "Total:") +test(1538.1, tables(), output = "Total:") +test(1538.2, !is.unsorted(tables(order.col="NROW")$NROW)) # uniqueN not support list-of-list: reverted #1224 d1 <- data.table(a = 1:4, l = list(list(letters[1:2]),list(Sys.time()),list(1:10),list(letters[1:2]))) From af73d5fdd55786b8c73bf95b4ff235abf6a104ab Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 5 Oct 2021 21:55:00 +0000 Subject: [PATCH 5/6] NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 18b5c5c49b..f593d97bba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -376,6 +376,8 @@ 44. In v1.13.2 a version of an old bug was reintroduced where during a grouping operation list columns could retain a pointer to the last group. This affected only attributes of list elements and only if those were updated during the grouping operation, [#4963](https://github.com/Rdatatable/data.table/issues/4963). Thanks to @fujiaxiang for reporting and @avimallu and Václav Tlapák for investigating and the PR. +45. `tables()` skips the name `...` when searching an environment for `data.table`s, [#5197](https://github.com/Rdatatable/data.table/issues/5197). Thanks @greg-minshall for the report and @michaelchirico for the fix. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : From b7571ba07ff96ae742811108c87b3aac58efb3bb Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 5 Oct 2021 22:06:32 +0000 Subject: [PATCH 6/6] use mget() over repeated get() --- R/tables.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tables.R b/R/tables.R index a5d4aa5f98..adb82066b5 100644 --- a/R/tables.R +++ b/R/tables.R @@ -7,7 +7,7 @@ tables = function(mb=TRUE, order.col="NAME", width=80, # Prints name, size and colnames of all data.tables in the calling environment by default # include "hidden" objects (starting with .) via all.names=TRUE, but exclude ... specifically, #5197 all_obj = grep("...", objects(envir=env, all.names=TRUE, sorted=order.col == "NAME"), invert=TRUE, fixed=TRUE, value=TRUE) - is_DT = vapply_1b(all_obj, function(x) is.data.table(get(x, envir=env))) + is_DT = vapply_1b(mget(all_obj, envir=env), is.data.table) if (!any(is_DT)) { if (!silent) catf("No objects of class data.table exist in %s\n", if (identical(env, .GlobalEnv)) ".GlobalEnv" else format(env)) return(invisible(data.table(NULL)))