From 75b560522da9aaa7375d1bf144fda7f6a14285e6 Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 27 Feb 2020 10:08:51 +0000 Subject: [PATCH 01/20] should return zero row table if all the list's length is 1 or 0 (at least one of them is 0) --- R/as.data.table.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/as.data.table.R b/R/as.data.table.R index 308a7b2ffe..d4af548e78 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -152,6 +152,7 @@ as.data.table.list = function(x, ncol = sum(eachncol) # hence removes NULL items silently (no error or warning), #842. if (ncol==0L) return(null.data.table()) nrow = max(eachnrow) + if (any(eachnrow==0L) && all(eachnrow<=1L)) nrow = 0L ans = vector("list",ncol) # always return a new VECSXP recycle = function(x, nrow) { if (length(x)==nrow) { From f9b2f21c4238d8595aea3cd16a6133c80ec208e3 Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 27 Feb 2020 10:09:04 +0000 Subject: [PATCH 02/20] no warning is going to be throwed --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 7cc6819e8f..d3aec8858a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -5798,7 +5798,7 @@ test(1380, DT[a==TRUE], DT[3:4]) # Fix #847, as.data.table.list and character(0) issue x <- data.table(a=character(0), b=character(0), c=numeric(0)) setkey(x, a, b) -test(1381, x[J("foo", character(0)), nomatch=0L], x, warning="Item 2 has 0 rows but longest item has 1; filled with NA") +test(1381, x[J("foo", character(0)), nomatch=0L], x) # Fix for #813 and #758 DT = data.table(x = 1:2) From a7df4baf970b9ee7a7bed7d65594c5ffb46615fb Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 27 Feb 2020 10:18:40 +0000 Subject: [PATCH 03/20] zero length is only meaningful for atomic and non-NULL elements 1. non atomic like list() is going to be recycled and not the "zero-length" we are referring to (we can regard list() is length 1 here) 2. NULL element is going to be discarded in later loop --- R/as.data.table.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index d4af548e78..612fde8d8d 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -152,7 +152,9 @@ as.data.table.list = function(x, ncol = sum(eachncol) # hence removes NULL items silently (no error or warning), #842. if (ncol==0L) return(null.data.table()) nrow = max(eachnrow) - if (any(eachnrow==0L) && all(eachnrow<=1L)) nrow = 0L + # only check the atomic and nonNULL type because NULL will be ignored while non-atomic (e.g., list) is always recycled + atomic_nonnull = vapply(x, function(xi) is.atomic(xi) && !is.null(xi), TRUE) + if (any(eachnrow[atomic_nonnull]==0L) && all(eachnrow<=1L)) nrow = 0L ans = vector("list",ncol) # always return a new VECSXP recycle = function(x, nrow) { if (length(x)==nrow) { From bb24a939e5ffa708c2faf79f4be89cb94d36c4a3 Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 27 Feb 2020 10:51:57 +0000 Subject: [PATCH 04/20] add tests --- inst/tests/tests.Rraw | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d3aec8858a..2d70463010 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16846,3 +16846,10 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN)) test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) + +# Should return zero-length data.table if all elements are length 1,0 and at least one is length zero. In short, should recyle the scalar instead of the zero-length vector when both exists, #3727 +test(2139.1, data.table(A=double(),B=1.0), data.table(A=double(),B=double())) +DT <- data.table(CODE = c('a', 'b'), DATE = 1:2, VALUE = c(1.3, 1.5), key = c('CODE', 'DATE')) +test(2139.2, DT[J(character(), 1), VALUE], double()) # because "J" is a wrapper of list() +test(2139.3, data.table(A=NULL,B=1.0), data.table(B=1.0)) # NULL is omited +test(2139.4, NROW(data.table(A=list(),B=1.0)), 1L) # list() is length 0 but not atomic so should return a length 1 data.table as it should be regarded as `list(list())`, which is length 1. From e411bed0d8dd068f9a6f69aa443e64bbd4083a37 Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 27 Feb 2020 10:57:17 +0000 Subject: [PATCH 05/20] add the NEWs entry --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 71fd76aa65..d20302e01e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -81,6 +81,8 @@ unit = "s") 14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR. +15. `as.data.table.list` now always recyles the scalar when only scalar and zero-length atomic vector exists. It means that `data.table(A = 1, B = double())` now returns a zero-row data.table object and `DT[J(character(), 1), VALUE]` returns a zero-length vector, which are expected in most cases, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Thanks to @shrektan for the suggestion and PR. + ## BUG FIXES 1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085). From fb3b72bd865695d9a9bf1851dd1ff915eb683836 Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 27 Feb 2020 11:05:10 +0000 Subject: [PATCH 06/20] add more comment --- R/as.data.table.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/as.data.table.R b/R/as.data.table.R index 612fde8d8d..5198a0ac23 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -154,6 +154,7 @@ as.data.table.list = function(x, nrow = max(eachnrow) # only check the atomic and nonNULL type because NULL will be ignored while non-atomic (e.g., list) is always recycled atomic_nonnull = vapply(x, function(xi) is.atomic(xi) && !is.null(xi), TRUE) + # #3727 if all the inputs are scalar or length-zero, the output should be zero since we should always recycle the scalar if (any(eachnrow[atomic_nonnull]==0L) && all(eachnrow<=1L)) nrow = 0L ans = vector("list",ncol) # always return a new VECSXP recycle = function(x, nrow) { From 6e2cf676afedc0187c963e5c4f7931c442ee1dcd Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 9 May 2020 14:56:31 +0800 Subject: [PATCH 07/20] should return zero-nrow data.table object when any nonnull and atomic element is length zero --- NEWS.md | 2 +- R/as.data.table.R | 4 ++-- inst/tests/tests.Rraw | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index d20302e01e..706ceb27c5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -81,7 +81,7 @@ unit = "s") 14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR. -15. `as.data.table.list` now always recyles the scalar when only scalar and zero-length atomic vector exists. It means that `data.table(A = 1, B = double())` now returns a zero-row data.table object and `DT[J(character(), 1), VALUE]` returns a zero-length vector, which are expected in most cases, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Thanks to @shrektan for the suggestion and PR. +15. `as.data.table.list` will only recycle the scalar elements and no longer recycle the length-0 ones. It means that `data.table(A = 1:3, B = double())` now returns a zero-row data.table object so `DT[J(1:3, double()), VALUE]` returns a zero-length vector, which is expected in most cases, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Thanks to @shrektan for the suggestion and PR. ## BUG FIXES diff --git a/R/as.data.table.R b/R/as.data.table.R index 5198a0ac23..fc29d0ccfc 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -154,8 +154,8 @@ as.data.table.list = function(x, nrow = max(eachnrow) # only check the atomic and nonNULL type because NULL will be ignored while non-atomic (e.g., list) is always recycled atomic_nonnull = vapply(x, function(xi) is.atomic(xi) && !is.null(xi), TRUE) - # #3727 if all the inputs are scalar or length-zero, the output should be zero since we should always recycle the scalar - if (any(eachnrow[atomic_nonnull]==0L) && all(eachnrow<=1L)) nrow = 0L + # #3727 if any atomic and nonnull element is length-zero, the output should be zero + if (any(eachnrow[atomic_nonnull]==0L)) nrow = 0L ans = vector("list",ncol) # always return a new VECSXP recycle = function(x, nrow) { if (length(x)==nrow) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2d70463010..9f13a682e7 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16847,8 +16847,8 @@ test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) -# Should return zero-length data.table if all elements are length 1,0 and at least one is length zero. In short, should recyle the scalar instead of the zero-length vector when both exists, #3727 -test(2139.1, data.table(A=double(),B=1.0), data.table(A=double(),B=double())) +# Should return zero-nrow data.table if any non-null&atomic element is length 0 #3727 +test(2139.1, data.table(A=double(),B=1:2), data.table(A=double(),B=double())) DT <- data.table(CODE = c('a', 'b'), DATE = 1:2, VALUE = c(1.3, 1.5), key = c('CODE', 'DATE')) test(2139.2, DT[J(character(), 1), VALUE], double()) # because "J" is a wrapper of list() test(2139.3, data.table(A=NULL,B=1.0), data.table(B=1.0)) # NULL is omited From 5b83a316ea61d0e85854eeca7c6afcab8415b3a2 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 23 May 2020 05:08:28 +0000 Subject: [PATCH 08/20] fix test 2139.1 - should use integer() not double() --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9f13a682e7..30081ab64f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16848,7 +16848,7 @@ A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) # Should return zero-nrow data.table if any non-null&atomic element is length 0 #3727 -test(2139.1, data.table(A=double(),B=1:2), data.table(A=double(),B=double())) +test(2139.1, data.table(A=double(),B=1:2), data.table(A=double(),B=integer())) DT <- data.table(CODE = c('a', 'b'), DATE = 1:2, VALUE = c(1.3, 1.5), key = c('CODE', 'DATE')) test(2139.2, DT[J(character(), 1), VALUE], double()) # because "J" is a wrapper of list() test(2139.3, data.table(A=NULL,B=1.0), data.table(B=1.0)) # NULL is omited From 09051245440fc12156fbbf274abb847ebedd9ddb Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 23 May 2020 05:08:49 +0000 Subject: [PATCH 09/20] fix test 1967.35 --- inst/tests/tests.Rraw | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 30081ab64f..05129ea35c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13673,7 +13673,8 @@ test(1967.34, data.table(1:5, NULL), data.table(V1=1:5)) ### if (novname[i]) vnames[[i]] = namesi ### but, on pause for now pending #3193 ### test(1967.35, data.table(1:5, matrix(6:15, nrow = 5L)) -test(1967.35, data.table(1:5, integer(0L)), data.table(1:5, NA_integer_), warning="Item 2 has 0 rows but longest item has 5; filled with NA") +# We no longer recycle zero-length vector after PR#4262 +test(1967.35, data.table(1:5, integer(0L)), data.table(integer(0L), integer(0L))) test(1967.36, data.table(1:5, key = 5L), error = 'must be character') x = data.table(a = 1:5) From 07db3fd3a1bc30f071e80d6e094d64398abe4805 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 23 May 2020 05:11:33 +0000 Subject: [PATCH 10/20] use = in test not <- --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 05129ea35c..93232c50b8 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16850,7 +16850,7 @@ test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) # Should return zero-nrow data.table if any non-null&atomic element is length 0 #3727 test(2139.1, data.table(A=double(),B=1:2), data.table(A=double(),B=integer())) -DT <- data.table(CODE = c('a', 'b'), DATE = 1:2, VALUE = c(1.3, 1.5), key = c('CODE', 'DATE')) +DT = data.table(CODE = c('a', 'b'), DATE = 1:2, VALUE = c(1.3, 1.5), key = c('CODE', 'DATE')) test(2139.2, DT[J(character(), 1), VALUE], double()) # because "J" is a wrapper of list() test(2139.3, data.table(A=NULL,B=1.0), data.table(B=1.0)) # NULL is omited test(2139.4, NROW(data.table(A=list(),B=1.0)), 1L) # list() is length 0 but not atomic so should return a length 1 data.table as it should be regarded as `list(list())`, which is length 1. From ee65ea0903b1b74e177df902b5f71e8466e60093 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 23 May 2020 05:12:21 +0000 Subject: [PATCH 11/20] add a new test --- inst/tests/tests.Rraw | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 93232c50b8..73e50c62d3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16854,3 +16854,5 @@ DT = data.table(CODE = c('a', 'b'), DATE = 1:2, VALUE = c(1.3, 1.5), key = c('CO test(2139.2, DT[J(character(), 1), VALUE], double()) # because "J" is a wrapper of list() test(2139.3, data.table(A=NULL,B=1.0), data.table(B=1.0)) # NULL is omited test(2139.4, NROW(data.table(A=list(),B=1.0)), 1L) # list() is length 0 but not atomic so should return a length 1 data.table as it should be regarded as `list(list())`, which is length 1. +DT = data.table(A = 1:3, B = letters[1:3]) +test(2139.5, DT[A > 3L, .(ITEM = 'A>3', A, B)], DT[A > 3][, .(ITEM = 'A>3', A, B)]) # the two operations should be identical now From 45e97702795d9672eb08ddcfa7dbaf1246f2b749 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 23 May 2020 06:14:45 +0000 Subject: [PATCH 12/20] remove the previous recycle warning --- R/as.data.table.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index fc29d0ccfc..b64b4bcd1c 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -177,8 +177,6 @@ as.data.table.list = function(x, if (is.null(xi)) { n_null = n_null+1L; next } if (eachnrow[i]>1L && nrow%%eachnrow[i]!=0L) # in future: eachnrow[i]!=nrow warning("Item ", i, " has ", eachnrow[i], " rows but longest item has ", nrow, "; recycled with remainder.") - if (eachnrow[i]==0L && nrow>0L && is.atomic(xi)) # is.atomic to ignore list() since list() is a common way to initialize; let's not insist on list(NULL) - warning("Item ", i, " has 0 rows but longest item has ", nrow, "; filled with NA") # the rep() in recycle() above creates the NA vector if (is.data.table(xi)) { # matrix and data.frame were coerced to data.table above prefix = if (!isFALSE(.named[i]) && isTRUE(nchar(names(x)[i])>0L)) paste0(names(x)[i],".") else "" # test 2058.12 for (j in seq_along(xi)) { From 505e80adbd0123ff7db397aec653ee0fa7b45012 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 23 May 2020 06:19:51 +0000 Subject: [PATCH 13/20] use vapply_1b --- R/as.data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index b64b4bcd1c..810b247461 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -153,7 +153,7 @@ as.data.table.list = function(x, if (ncol==0L) return(null.data.table()) nrow = max(eachnrow) # only check the atomic and nonNULL type because NULL will be ignored while non-atomic (e.g., list) is always recycled - atomic_nonnull = vapply(x, function(xi) is.atomic(xi) && !is.null(xi), TRUE) + atomic_nonnull = vapply_1b(x, function(xi) is.atomic(xi) && !is.null(xi)) # #3727 if any atomic and nonnull element is length-zero, the output should be zero if (any(eachnrow[atomic_nonnull]==0L)) nrow = 0L ans = vector("list",ncol) # always return a new VECSXP From e53390893c9c19abf547ac6dc26dccb7352f3ea8 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 23 May 2020 06:36:00 +0000 Subject: [PATCH 14/20] tweak the NEWS item --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 706ceb27c5..18e49af225 100644 --- a/NEWS.md +++ b/NEWS.md @@ -81,7 +81,7 @@ unit = "s") 14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR. -15. `as.data.table.list` will only recycle the scalar elements and no longer recycle the length-0 ones. It means that `data.table(A = 1:3, B = double())` now returns a zero-row data.table object so `DT[J(1:3, double()), VALUE]` returns a zero-length vector, which is expected in most cases, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Thanks to @shrektan for the suggestion and PR. +15. `as.data.table.list` now no longer recycles the length-0 element. Previously, this was recycled by filling with `NA` with a warning. It means that `data.table(A = 1:3, B = double())` now returns a zero-row data.table object so `DT[J(1:3, double()), VALUE]` returns a zero-length vector, which is expected in most cases, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Thanks to @shrektan for the suggestion and PR. ## BUG FIXES From 0a78f78e79d8037ecdc2739d03781c99de1e5185 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 26 Apr 2021 15:36:52 -0600 Subject: [PATCH 15/20] slightly simpler; nrow=if() --- R/as.data.table.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/as.data.table.R b/R/as.data.table.R index 810b247461..47219206a2 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -129,6 +129,7 @@ as.data.table.list = function(x, eachncol = integer(n) missing.check.names = missing(check.names) origListNames = if (missing(.named)) names(x) else NULL # as.data.table called directly, not from inside data.table() which provides .named, #3854 + empty_atomic = FALSE for (i in seq_len(n)) { xi = x[[i]] if (is.null(xi)) next # eachncol already initialized to 0 by integer() above @@ -148,14 +149,13 @@ as.data.table.list = function(x, } eachnrow[i] = NROW(xi) # for a vector (including list() columns) returns the length eachncol[i] = NCOL(xi) # for a vector returns 1 + if (is.atomic(xi) && length(xi)==0L && !is.null(xi)) { + empty_atomic = TRUE # any empty atomic (not empty list()) should result in nrows=0L, #3727 + } } ncol = sum(eachncol) # hence removes NULL items silently (no error or warning), #842. if (ncol==0L) return(null.data.table()) - nrow = max(eachnrow) - # only check the atomic and nonNULL type because NULL will be ignored while non-atomic (e.g., list) is always recycled - atomic_nonnull = vapply_1b(x, function(xi) is.atomic(xi) && !is.null(xi)) - # #3727 if any atomic and nonnull element is length-zero, the output should be zero - if (any(eachnrow[atomic_nonnull]==0L)) nrow = 0L + nrow = if (empty_atomic) 0L else max(eachnrow) ans = vector("list",ncol) # always return a new VECSXP recycle = function(x, nrow) { if (length(x)==nrow) { From 85933703c1d387fcd7156cacbe4a062bb417ac84 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 26 Apr 2021 15:48:25 -0600 Subject: [PATCH 16/20] minor tidy of new tests; e.g. vertical align of 2171.5 to more easily see the difference between x and y being tested --- inst/tests/tests.Rraw | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d84d52f90b..80bacac2e8 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17348,11 +17348,12 @@ test(2170.3, DT[A > -1 | is.na(A), which = NA], integer()) test(2170.4, DT[A > 10, which = NA], seq_len(nrow(DT))) test(2170.5, DT[!(A > 1), which = NA], c(1:3,6L)) # matches DT[A <= 1, which = NA] -# Should return zero-nrow data.table if any non-null&atomic element is length 0 #3727 -test(2171.1, data.table(A=double(),B=1:2), data.table(A=double(),B=integer())) -DT = data.table(CODE = c('a', 'b'), DATE = 1:2, VALUE = c(1.3, 1.5), key = c('CODE', 'DATE')) +# data.table() zero-nrow result if any non-null & atomic element is length 0, #3727 +test(2171.1, data.table(A=double(), B=1:2), data.table(A=double(), B=integer())) +DT = data.table(CODE=c('a','b'), DATE=1:2, VALUE=c(1.3, 1.5), key=c('CODE','DATE')) test(2171.2, DT[J(character(), 1), VALUE], double()) # because "J" is a wrapper of list() -test(2171.3, data.table(A=NULL,B=1.0), data.table(B=1.0)) # NULL is omited -test(2171.4, NROW(data.table(A=list(),B=1.0)), 1L) # list() is length 0 but not atomic so should return a length 1 data.table as it should be regarded as `list(list())`, which is length 1. -DT = data.table(A = 1:3, B = letters[1:3]) -test(2171.5, DT[A > 3L, .(ITEM = 'A>3', A, B)], DT[A > 3][, .(ITEM = 'A>3', A, B)]) # the two operations should be identical now +test(2171.3, data.table(A=NULL, B=1.0), data.table(B=1.0)) # NULL is omited +test(2171.4, NROW(data.table(A=list(), B=1.0)), 1L) # empty list() regarded as `list(list())` which is length 1, and recycled +DT = data.table(A=1:3, B=letters[1:3]) +test(2171.5, DT[A>3, .(ITEM='A>3', A, B)], # now identical as expected + DT[A>3][, .(ITEM='A>3', A, B)]) From 0c514210f5f7e220e402cd80acaeb0233e13aeac Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 26 Apr 2021 15:53:16 -0600 Subject: [PATCH 17/20] test not just identical but correct too --- inst/tests/tests.Rraw | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 80bacac2e8..750114f71c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17355,5 +17355,6 @@ test(2171.2, DT[J(character(), 1), VALUE], double()) # because "J" is a wrapper test(2171.3, data.table(A=NULL, B=1.0), data.table(B=1.0)) # NULL is omited test(2171.4, NROW(data.table(A=list(), B=1.0)), 1L) # empty list() regarded as `list(list())` which is length 1, and recycled DT = data.table(A=1:3, B=letters[1:3]) -test(2171.5, DT[A>3, .(ITEM='A>3', A, B)], # now identical as expected - DT[A>3][, .(ITEM='A>3', A, B)]) +test(2171.5, ans <- DT[A>3, .(ITEM='A>3', A, B)], # now identical as expected + DT[A>3][, .(ITEM='A>3', A, B)]) +test(2171.6, ans, data.table(ITEM=character(), A=integer(), B=character())) # not just identical to each other, but correct too From 72b4f2f586cf135711714db12fbcd7ea15b35a1e Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 26 Apr 2021 16:25:31 -0600 Subject: [PATCH 18/20] news item expanded with examples --- NEWS.md | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 487a7a17ed..223869fa0a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,7 +12,41 @@ 3. `fwrite()` now writes UTF-8 or native csv files by specifying the `encoding=` argument, [#1770](https://github.com/Rdatatable/data.table/pull/1770). Thanks to @shrektan for the request and the PR. -4. `as.data.table.list` now no longer recycles the length-0 element. Previously, this was recycled by filling with `NA` with a warning. It means that `data.table(A = 1:3, B = double())` now returns a zero-row data.table object so `DT[J(1:3, double()), VALUE]` returns a zero-length vector, which is expected in most cases, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Thanks to @shrektan for the suggestion and PR. +4. `data.table()` no longer fills empty vectors with `NA` with warning. Instead a 0-row `data.table` is returned, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Since `data.table()` is used internally by `J()` and `.()`, this brings the following examples in line with expectations in most cases. Thanks to @shrektan for the suggestion and PR. + + ```R + DT = data.table(A=1:3, B=letters[1:3]) + + DT[A>3, .(ITEM='A>3', A, B)] # (1) + DT[A>3][, .(ITEM='A>3', A, B)] # (2) + + # the above are now equivalent as expected and return: + Empty data.table (0 rows and 3 cols): ITEM,A,B + + # Previously, (2) returned : + ITEM A B + + 1: A>3 NA + Warning messages: + 1: In as.data.table.list(jval, .named = NULL) : + Item 2 has 0 rows but longest item has 1; filled with NA + 2: In as.data.table.list(jval, .named = NULL) : + Item 3 has 0 rows but longest item has 1; filled with NA + ``` + + ```R + DT = data.table(A=1:3, B=letters[1:3], key="A") + DT[J(1:3, double()), B] + + # new result : + character(0) + + # old result: + [1] "a" "b" "c" + Warning message: + In as.data.table.list(i) : + Item 2 has 0 rows but longest item has 3; filled with NA + ``` ## BUG FIXES From 0094a08f91f479e5c340a99b7d28826833aab464 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 26 Apr 2021 16:35:31 -0600 Subject: [PATCH 19/20] use .() rather than J() in news item --- NEWS.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 223869fa0a..c9a8280c3f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,17 +12,14 @@ 3. `fwrite()` now writes UTF-8 or native csv files by specifying the `encoding=` argument, [#1770](https://github.com/Rdatatable/data.table/pull/1770). Thanks to @shrektan for the request and the PR. -4. `data.table()` no longer fills empty vectors with `NA` with warning. Instead a 0-row `data.table` is returned, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Since `data.table()` is used internally by `J()` and `.()`, this brings the following examples in line with expectations in most cases. Thanks to @shrektan for the suggestion and PR. +4. `data.table()` no longer fills empty vectors with `NA` with warning. Instead a 0-row `data.table` is returned, [#3727](https://github.com/Rdatatable/data.table/issues/3727). Since `data.table()` is used internally by `.()`, this brings the following examples in line with expectations in most cases. Thanks to @shrektan for the suggestion and PR. ```R DT = data.table(A=1:3, B=letters[1:3]) - DT[A>3, .(ITEM='A>3', A, B)] # (1) DT[A>3][, .(ITEM='A>3', A, B)] # (2) - # the above are now equivalent as expected and return: Empty data.table (0 rows and 3 cols): ITEM,A,B - # Previously, (2) returned : ITEM A B @@ -36,12 +33,10 @@ ```R DT = data.table(A=1:3, B=letters[1:3], key="A") - DT[J(1:3, double()), B] - + DT[.(1:3, double()), B] # new result : character(0) - - # old result: + # old result : [1] "a" "b" "c" Warning message: In as.data.table.list(i) : From 15514b0cbcd217d81459360037be6762cb5b56d0 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 26 Apr 2021 16:43:30 -0600 Subject: [PATCH 20/20] comment about change to test 1967.35 moved to be on same line as the test --- inst/tests/tests.Rraw | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 750114f71c..ba7cb0579e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -13754,8 +13754,7 @@ test(1967.34, data.table(1:5, NULL), data.table(V1=1:5)) ### if (novname[i]) vnames[[i]] = namesi ### but, on pause for now pending #3193 ### test(1967.35, data.table(1:5, matrix(6:15, nrow = 5L)) -# We no longer recycle zero-length vector after PR#4262 -test(1967.35, data.table(1:5, integer(0L)), data.table(integer(0L), integer(0L))) +test(1967.35, data.table(1:5, integer(0L)), data.table(integer(0L), integer(0L))) # no longer NA-fill zero-length, PR#4262 test(1967.36, data.table(1:5, key = 5L), error = 'must be character') x = data.table(a = 1:5)