From fdec7a1d21e0ffcb2a3efc6e1d153cb38d303abe Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sun, 31 Jan 2021 01:37:52 -0800 Subject: [PATCH 01/31] added failing tests --- inst/tests/tests.Rraw | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2b44b3038f..f465d72474 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17261,3 +17261,23 @@ if (identical(x, enc2native(x))) { # fintersect now preserves order of first argument like intersect, #4716 test(2163, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$x, c("c", "a")) + +# named arguments of c() in j get prepended to lapply(.SD, FUN) #2311 +M <- as.data.table(mtcars) + +dt1 = M[, c(m=lapply(.SD, mean)),by="cyl"] +test(2164.1, names(dt1), c("cyl", "m.mpg", "m.disp", "m.hp", "m.drat", "m.wt", "m.qsec", + "m.vs", "m.am", "m.gear", "m.carb")) + +dt2 = M[, c(m=lapply(.SD, mean), s=lapply(.SD, sum)),by="cyl", .SDcols=c("vs", "am")] +test(2164.2, names(dt2), c("cyl", "m.vs", "m.am", "s.vs", "s.am")) + +dt3 = M[, c(Mpg=list(mpg), m=lapply(.SD, mean), s=lapply(.SD, sum)),by="cyl", .SDcols=c("vs", "am")] +test(2164.3, names(dt3), c("cyl", "Mpg", "m.vs", "m.am", "s.vs", "s.am")) + +dt4 = M[, c(Mpg=list(mpg), list(mpg), maxwt=max(wt), max(wt), min_qsec=min(qsec), + mean=lapply(.SD,mean), q.=lapply(.SD, function(x){sum(x)/carb[1]})), + by="cyl", .SDcols=c("disp","hp","drat")] +test(2164.4, names(dt4), c("cyl", "Mpg", "V2", "maxwt", "V4", "min_qsec", "mean.disp", "mean.hp", + "mean.drat", "q..disp", "q..hp", "q..drat")) + From a221c6e4e67f32b7ccc40963655243a75fdd0ddb Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sun, 31 Jan 2021 01:58:03 -0800 Subject: [PATCH 02/31] successfully fixed 2164.1 but broke others. also, havent fixed named list yet --- R/data.table.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 2b010db77a..943772bcb4 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1595,7 +1595,14 @@ replace_dot_alias = function(e) { deparse_ans = .massageSD(this) funi = funi + 1L # Fix for #985 jsubl[[i_]] = as.list(deparse_ans[[1L]][-1L]) # just keep the '.' from list(.) - jvnames = c(jvnames, deparse_ans[[2L]]) + jn__ = deparse_ans[[2L]] + if (!is.null(names(jsubl)[i_])) { + # Fix for #2311, prepend named arguments of c() to column names of .SD + # e.g. c(mean=lapply(.SD, mean)) + jn__ = paste(names(jsubl)[i_], jn__, sep=".") + # sep="." for consistency with c(A=list(a=1,b=1)) + } + jvnames = c(jvnames, jn__) } else if (this[[1L]] == "list") { # also handle c(lapply(.SD, sum), list()) - silly, yes, but can happen if (length(this) > 1L) { From 9fbecf5bd1d5ebf0febcf7a0a46c3b8dd65002f3 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sun, 31 Jan 2021 02:10:07 -0800 Subject: [PATCH 03/31] fixed random broken tests. only the c(a=list(),lapply(.SD,FUN)) situation left --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 943772bcb4..d46efff559 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1596,7 +1596,7 @@ replace_dot_alias = function(e) { funi = funi + 1L # Fix for #985 jsubl[[i_]] = as.list(deparse_ans[[1L]][-1L]) # just keep the '.' from list(.) jn__ = deparse_ans[[2L]] - if (!is.null(names(jsubl)[i_])) { + if (!is.null(names(jsubl)[i_]) && names(jsubl)[i_] != "") { # Fix for #2311, prepend named arguments of c() to column names of .SD # e.g. c(mean=lapply(.SD, mean)) jn__ = paste(names(jsubl)[i_], jn__, sep=".") From 7fd276e2af95be003ea2d99e8589d5e1c685a438 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sun, 31 Jan 2021 03:28:42 -0800 Subject: [PATCH 04/31] More tests. Some still failing --- NEWS.md | 2 ++ R/data.table.R | 16 +++++++++++++++- inst/tests/tests.Rraw | 28 ++++++++++++++++++++++++++-- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index e79ab4a795..7902723564 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ 2. `fintersect()` now retains the order of the first argument as reasonably expected, rather than retaining the order of the second argument, [#4716](https://github.com/Rdatatable/data.table/issues/4716). Thanks to Michel Lang for reporting, and Ben Schwen for the PR. +3. Tagged/named arguments in j=c() calls would not always result in correctly named columns. #2311 Tagging/naming an lapply(.SD, FUN) call as an argument of c() in j will now always cause those tags to get prepended (with a single dot separator) to the resulting column names, e.g. x[, c(mean=lapply(.SD, mean)), by="z"]. Previously, prepending these tags was applied inconsistently. They were omitted in the presence of a by statement. This change also applies to tagging/naming a list() call as an argument of c() in j. Previously tags/names were omitted when there was both a by statement and the presence of a lapply(.SD, FUN) call as another argument of c(), e.g. x[, c(tag1=list(var1, b=var2), lapply(.SD, FUN)), by="z"]. Naming when j=c() should now more closely follow base R conventions for concatenating named lists. Thanks to @franknarf1 for reporting and Michael T Young for the PR. + ## NOTES 1. Compiling from source no longer requires `zlib` header files to be available, [#4844](https://github.com/Rdatatable/data.table/pull/4844). The output suggests installing `zlib` headers, and how (e.g. `zlib1g-dev` on Ubuntu) as before, but now proceeds with `gzip` compression disabled in `fwrite`. Upon calling `fwrite(DT, "file.csv.gz")` at runtime, an error message suggests to reinstall `data.table` with `zlib` headers available. This does not apply to users on Windows or Mac who install the pre-compiled binary package from CRAN. diff --git a/R/data.table.R b/R/data.table.R index d46efff559..fa09f75e62 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1607,7 +1607,21 @@ replace_dot_alias = function(e) { # also handle c(lapply(.SD, sum), list()) - silly, yes, but can happen if (length(this) > 1L) { jl__ = as.list(jsubl[[i_]])[-1L] # just keep the '.' from list(.) - jn__ = if (is.null(names(jl__))) rep("", length(jl__)) else names(jl__) + if (!is.null(names(jsubl)[i_]) && names(jsubl)[i_] != "") { + # Fix for #2311, prepend named list arguments of c() to that list's names: + #e.g. x[, c(A=list(), lapply(.SD, mean)),by="z"] + #now consider: + #x[, c(A=list(x,y), lapply(.SD, mean)),by="z"] #--> names A1 A2 + #x[, c(A=list(x,b=y), lapply(.SD, mean)),by="z"] #--> names A1 A.b + #x[, c(A=list(a=x,b=y), lapply(.SD, mean)),by="z"] #--> names A.a A.b + #these all follow base R. e.g. c(A=list(0,b=0)) + njl__ = if (is.null(names(jl__))) rep("", length(jl__)) else names(jl__) + njl__nonblank = names(jl__) != "" + jn__ = paste0(names(jsubl)[i_], seq_along(jl__)) + jn__[njl__nonblank] = paste(names(jsubl)[i_], njl__[njl__nonblank], sep=".") + } else { + jn__ = if (is.null(names(jl__))) rep("", length(jl__)) else names(jl__) + } idx = unlist(lapply(jl__, function(x) is.name(x) && x == ".I")) if (any(idx)) jn__[idx & (jn__ == "")] = "I" jvnames = c(jvnames, jn__) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f465d72474..c30df495a4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17266,12 +17266,12 @@ test(2163, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$ M <- as.data.table(mtcars) dt1 = M[, c(m=lapply(.SD, mean)),by="cyl"] -test(2164.1, names(dt1), c("cyl", "m.mpg", "m.disp", "m.hp", "m.drat", "m.wt", "m.qsec", - "m.vs", "m.am", "m.gear", "m.carb")) +test(2164.1, names(dt1), c("cyl", "m.mpg", "m.disp", "m.hp", "m.drat", "m.wt", "m.qsec", "m.vs", "m.am", "m.gear", "m.carb")) dt2 = M[, c(m=lapply(.SD, mean), s=lapply(.SD, sum)),by="cyl", .SDcols=c("vs", "am")] test(2164.2, names(dt2), c("cyl", "m.vs", "m.am", "s.vs", "s.am")) +#naming a list() argument as well as an lapply() argument dt3 = M[, c(Mpg=list(mpg), m=lapply(.SD, mean), s=lapply(.SD, sum)),by="cyl", .SDcols=c("vs", "am")] test(2164.3, names(dt3), c("cyl", "Mpg", "m.vs", "m.am", "s.vs", "s.am")) @@ -17281,3 +17281,27 @@ dt4 = M[, c(Mpg=list(mpg), list(mpg), maxwt=max(wt), max(wt), min_qsec=min(qsec) test(2164.4, names(dt4), c("cyl", "Mpg", "V2", "maxwt", "V4", "min_qsec", "mean.disp", "mean.hp", "mean.drat", "q..disp", "q..hp", "q..drat")) +#now for the c(name=list(name1=value,name2=value2)) construct: +#for reference, here's what happens in base R, and has been working previously: +dt5 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] +test(2164.5, names(dt5), c("A.a","A.b", "vs","am")) + +#previously broken: +dt6 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] +test(2164.6, names(dt6), c("cyl", "A.a","A.b", "vs", "am")) + +#base R, previously working: +dt7 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] +test(2164.7, names(dt7), c("A1","A2", "vs","am")) #A1 and A2 is consistent with base R c(A=list(0,0)) + +#previously broken +dt8 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] +test(2164.8, names(dt8), c("cyl","A1","A2", "vs","am")) + +#previously working: +dt9 = M[, c(A=list(mpg, b=hp, wt),lapply(.SD, mean)), .SDcols=c("vs", "am")] +test(2164.9, names(dt9), c("A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) + +#previously broken: +dt10 = M[, c(A=list(mpg, b=hp, wt),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] +test(2164.10, names(dt10), c("cyl", "A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) \ No newline at end of file From 8d0052b5e0fd08c5a9fc5765da09712fd436c090 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sun, 31 Jan 2021 10:51:57 -0800 Subject: [PATCH 05/31] c(A=list()) shouldn't get a number prefixed --- R/data.table.R | 7 ++++++- inst/tests/tests.Rraw | 21 +++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index fa09f75e62..235e586f8b 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1614,10 +1614,15 @@ replace_dot_alias = function(e) { #x[, c(A=list(x,y), lapply(.SD, mean)),by="z"] #--> names A1 A2 #x[, c(A=list(x,b=y), lapply(.SD, mean)),by="z"] #--> names A1 A.b #x[, c(A=list(a=x,b=y), lapply(.SD, mean)),by="z"] #--> names A.a A.b + #x[, c(A=list(x), lapply(.SD, mean)),by="z"] #--> names A (note length-1 doesn't get integer suffix) #these all follow base R. e.g. c(A=list(0,b=0)) njl__ = if (is.null(names(jl__))) rep("", length(jl__)) else names(jl__) njl__nonblank = names(jl__) != "" - jn__ = paste0(names(jsubl)[i_], seq_along(jl__)) + if(length(jl__)>1L){ + jn__ = paste0(names(jsubl)[i_], seq_along(jl__)) + } else { + jn__ = names(jsubl)[i_] + } jn__[njl__nonblank] = paste(names(jsubl)[i_], njl__[njl__nonblank], sep=".") } else { jn__ = if (is.null(names(jl__))) rep("", length(jl__)) else names(jl__) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c30df495a4..c82a50417f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17265,14 +17265,15 @@ test(2163, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$ # named arguments of c() in j get prepended to lapply(.SD, FUN) #2311 M <- as.data.table(mtcars) -dt1 = M[, c(m=lapply(.SD, mean)),by="cyl"] -test(2164.1, names(dt1), c("cyl", "m.mpg", "m.disp", "m.hp", "m.drat", "m.wt", "m.qsec", "m.vs", "m.am", "m.gear", "m.carb")) +dt1 = M[, c(m=lapply(.SD, mean)), by="cyl"] +test(2164.1, names(dt1), c("cyl", "m.mpg", "m.disp", "m.hp", "m.drat", "m.wt", "m.qsec", +"m.vs", "m.am", "m.gear", "m.carb")) -dt2 = M[, c(m=lapply(.SD, mean), s=lapply(.SD, sum)),by="cyl", .SDcols=c("vs", "am")] +dt2 = M[, c(m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] test(2164.2, names(dt2), c("cyl", "m.vs", "m.am", "s.vs", "s.am")) #naming a list() argument as well as an lapply() argument -dt3 = M[, c(Mpg=list(mpg), m=lapply(.SD, mean), s=lapply(.SD, sum)),by="cyl", .SDcols=c("vs", "am")] +dt3 = M[, c(Mpg=list(mpg), m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] test(2164.3, names(dt3), c("cyl", "Mpg", "m.vs", "m.am", "s.vs", "s.am")) dt4 = M[, c(Mpg=list(mpg), list(mpg), maxwt=max(wt), max(wt), min_qsec=min(qsec), @@ -17286,7 +17287,7 @@ test(2164.4, names(dt4), c("cyl", "Mpg", "V2", "maxwt", "V4", "min_qsec", "mean. dt5 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] test(2164.5, names(dt5), c("A.a","A.b", "vs","am")) -#previously broken: +#previously broken, due to by statement dt6 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] test(2164.6, names(dt6), c("cyl", "A.a","A.b", "vs", "am")) @@ -17294,14 +17295,14 @@ test(2164.6, names(dt6), c("cyl", "A.a","A.b", "vs", "am")) dt7 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] test(2164.7, names(dt7), c("A1","A2", "vs","am")) #A1 and A2 is consistent with base R c(A=list(0,0)) -#previously broken +#previously broken, due to by statement dt8 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] test(2164.8, names(dt8), c("cyl","A1","A2", "vs","am")) #previously working: -dt9 = M[, c(A=list(mpg, b=hp, wt),lapply(.SD, mean)), .SDcols=c("vs", "am")] +dt9 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), .SDcols=c("vs", "am")] test(2164.9, names(dt9), c("A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) -#previously broken: -dt10 = M[, c(A=list(mpg, b=hp, wt),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] -test(2164.10, names(dt10), c("cyl", "A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) \ No newline at end of file +#previously broken, due to by statement +dt10 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] +test(2164.10, names(dt10), c("cyl", "A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) From e355f80f786f5dbc14f6b6e33f455e4f66df0374 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sun, 31 Jan 2021 11:48:19 -0800 Subject: [PATCH 06/31] fixed previously failing test. the issue was that I had 2164.1 and 2164.10. oops. also added more tests. all passing now --- inst/tests/tests.Rraw | 77 ++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 42 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c82a50417f..62cf2d54e5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17263,46 +17263,39 @@ if (identical(x, enc2native(x))) { test(2163, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$x, c("c", "a")) # named arguments of c() in j get prepended to lapply(.SD, FUN) #2311 -M <- as.data.table(mtcars) -dt1 = M[, c(m=lapply(.SD, mean)), by="cyl"] -test(2164.1, names(dt1), c("cyl", "m.mpg", "m.disp", "m.hp", "m.drat", "m.wt", "m.qsec", -"m.vs", "m.am", "m.gear", "m.carb")) - -dt2 = M[, c(m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] -test(2164.2, names(dt2), c("cyl", "m.vs", "m.am", "s.vs", "s.am")) - -#naming a list() argument as well as an lapply() argument -dt3 = M[, c(Mpg=list(mpg), m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] -test(2164.3, names(dt3), c("cyl", "Mpg", "m.vs", "m.am", "s.vs", "s.am")) - -dt4 = M[, c(Mpg=list(mpg), list(mpg), maxwt=max(wt), max(wt), min_qsec=min(qsec), - mean=lapply(.SD,mean), q.=lapply(.SD, function(x){sum(x)/carb[1]})), - by="cyl", .SDcols=c("disp","hp","drat")] -test(2164.4, names(dt4), c("cyl", "Mpg", "V2", "maxwt", "V4", "min_qsec", "mean.disp", "mean.hp", - "mean.drat", "q..disp", "q..hp", "q..drat")) - -#now for the c(name=list(name1=value,name2=value2)) construct: -#for reference, here's what happens in base R, and has been working previously: -dt5 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] -test(2164.5, names(dt5), c("A.a","A.b", "vs","am")) - -#previously broken, due to by statement -dt6 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] -test(2164.6, names(dt6), c("cyl", "A.a","A.b", "vs", "am")) - -#base R, previously working: -dt7 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] -test(2164.7, names(dt7), c("A1","A2", "vs","am")) #A1 and A2 is consistent with base R c(A=list(0,0)) - -#previously broken, due to by statement -dt8 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] -test(2164.8, names(dt8), c("cyl","A1","A2", "vs","am")) - -#previously working: -dt9 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), .SDcols=c("vs", "am")] -test(2164.9, names(dt9), c("A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) - -#previously broken, due to by statement -dt10 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] -test(2164.10, names(dt10), c("cyl", "A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) +#duplicated .SDcols? +#by variable in .SD + +M <- as.data.table(mtcars) +dt1 = M[, c(m=lapply(.SD, mean)), by="cyl"] +dt2 = M[, c(m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] +dt3 = M[, c(m=lapply(.SD, mean), lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] +dt4 = M[, c(m=lapply(.SD, mean), lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am", "am")] +dt5 = M[, c(m=lapply(.SD, mean), lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am", "cyl")] +dt6 = M[, c(m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl"] +dt7 = M[, c(m=lapply(.SD, mean), m=lapply(.SD, sum)), by="cyl"] +dt8 = M[, c(Mpg=list(mpg), m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] +dt9 = M[, c(Mpg=list(mpg), list(mpg), maxwt=max(wt), max(wt), min_qsec=min(qsec), mean=lapply(.SD,mean), q.=lapply(.SD, function(x){sum(x)/carb[1]})),by="cyl", .SDcols=c("disp","hp","drat")] +dt10 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] +dt11 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] +dt12 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] +dt13 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] +dt14 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), .SDcols=c("vs", "am")] +dt15 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] + +test(2164.01, names(dt1), c("cyl", "m.mpg", "m.disp", "m.hp", "m.drat", "m.wt", "m.qsec", "m.vs", "m.am", "m.gear", "m.carb")) +test(2164.02, names(dt2), c("cyl", "m.vs", "m.am", "s.vs", "s.am")) +test(2164.03, names(dt3), c("cyl", "m.vs", "m.am", "vs", "am")) +test(2164.04, names(dt4), c("cyl", "m.vs", "m.am", "m.am", "vs", "am", "am")) +test(2164.05, names(dt5), c("cyl", "m.vs", "m.am", "m.cyl", "vs", "am", "cyl")) +test(2164.06, names(dt6), c("cyl", paste0("m.", setdiff(names(M), "cyl")), paste0("s.", setdiff(names(M), "cyl")))) +test(2164.07, names(dt7), c("cyl", paste0("m.", setdiff(names(M), "cyl")), paste0("m.", setdiff(names(M), "cyl")))) +test(2164.08, names(dt8), c("cyl", "Mpg", "m.vs", "m.am", "s.vs", "s.am")) +test(2164.09, names(dt9), c("cyl", "Mpg", "V2", "maxwt", "V4", "min_qsec", "mean.disp", "mean.hp", "mean.drat", "q..disp", "q..hp", "q..drat")) +test(2164.10, names(dt10), c("A.a","A.b", "vs","am")) +test(2164.11, names(dt11), c("cyl", "A.a","A.b", "vs", "am")) +test(2164.12, names(dt12), c("A1","A2", "vs","am")) #A1 and A2 is consistent with base R c(A=list(0,0)) +test(2164.13, names(dt13), c("cyl","A1","A2", "vs","am")) +test(2164.14, names(dt14), c("A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) +test(2164.15, names(dt15), c("cyl", "A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) From 26fd86301e57fc1aa6e820cd42b7da24e8b039fb Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sun, 31 Jan 2021 12:03:05 -0800 Subject: [PATCH 07/31] add to author list --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index dc8f324342..15865577fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -61,7 +61,8 @@ Authors@R: c( person("Vaclav","Tlapak", role="ctb"), person("Kevin","Ushey", role="ctb"), person("Dirk","Eddelbuettel", role="ctb"), - person("Ben","Schwen", role="ctb")) + person("Ben","Schwen", role="ctb"), + person("Michael","Young", role="ctb")) Depends: R (>= 3.1.0) Imports: methods Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown From 99eec501ac0655fe220c84b04229289e6b1fc821 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sun, 31 Jan 2021 16:04:27 -0800 Subject: [PATCH 08/31] added github link to issue --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 7902723564..e811f52070 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,7 +10,7 @@ 2. `fintersect()` now retains the order of the first argument as reasonably expected, rather than retaining the order of the second argument, [#4716](https://github.com/Rdatatable/data.table/issues/4716). Thanks to Michel Lang for reporting, and Ben Schwen for the PR. -3. Tagged/named arguments in j=c() calls would not always result in correctly named columns. #2311 Tagging/naming an lapply(.SD, FUN) call as an argument of c() in j will now always cause those tags to get prepended (with a single dot separator) to the resulting column names, e.g. x[, c(mean=lapply(.SD, mean)), by="z"]. Previously, prepending these tags was applied inconsistently. They were omitted in the presence of a by statement. This change also applies to tagging/naming a list() call as an argument of c() in j. Previously tags/names were omitted when there was both a by statement and the presence of a lapply(.SD, FUN) call as another argument of c(), e.g. x[, c(tag1=list(var1, b=var2), lapply(.SD, FUN)), by="z"]. Naming when j=c() should now more closely follow base R conventions for concatenating named lists. Thanks to @franknarf1 for reporting and Michael T Young for the PR. +3. Tagged/named arguments in j=c() calls now result in correctly named columns, [#2311](https://github.com/Rdatatable/data.table/issues/2311) Tagging/naming an lapply(.SD, FUN) call as an argument of c() in j will now always cause those tags to get prepended (with a single dot separator) to the resulting column names, e.g. x[, c(mean=lapply(.SD, mean)), by="z"]. Previously, prepending these tags was applied inconsistently. They were omitted in the presence of a by statement. This change also applies to tagging/naming a list() call as an argument of c() in j. Previously tags/names were omitted when there was both a by statement and the presence of a lapply(.SD, FUN) call as another argument of c(), e.g. x[, c(tag1=list(var1, b=var2), lapply(.SD, FUN)), by="z"]. Naming when j=c() should now more closely follow base R conventions for concatenating named lists. Thanks to @franknarf1 for reporting and Michael T Young for the PR. ## NOTES From c9ccc5a0403ce510e58c2e013a08349a7069f815 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Wed, 3 Feb 2021 00:25:16 -0800 Subject: [PATCH 09/31] code style changes --- R/data.table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 235e586f8b..d8882954d9 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1618,7 +1618,7 @@ replace_dot_alias = function(e) { #these all follow base R. e.g. c(A=list(0,b=0)) njl__ = if (is.null(names(jl__))) rep("", length(jl__)) else names(jl__) njl__nonblank = names(jl__) != "" - if(length(jl__)>1L){ + if (length(jl__)>1L) { jn__ = paste0(names(jsubl)[i_], seq_along(jl__)) } else { jn__ = names(jsubl)[i_] From 75af86995fba62a48b5b23a4445712cd5d097031 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Fri, 19 Feb 2021 17:31:44 -0800 Subject: [PATCH 10/31] reworked tests. now includes tests that fail only when optimize=0 --- inst/tests/tests.Rraw | 110 +++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 34 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 62cf2d54e5..9d2cbcee70 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17264,38 +17264,80 @@ test(2163, fintersect(data.table(x=c("b", "c", "a")), data.table(x=c("a","c")))$ # named arguments of c() in j get prepended to lapply(.SD, FUN) #2311 -#duplicated .SDcols? -#by variable in .SD - M <- as.data.table(mtcars) -dt1 = M[, c(m=lapply(.SD, mean)), by="cyl"] -dt2 = M[, c(m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] -dt3 = M[, c(m=lapply(.SD, mean), lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] -dt4 = M[, c(m=lapply(.SD, mean), lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am", "am")] -dt5 = M[, c(m=lapply(.SD, mean), lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am", "cyl")] -dt6 = M[, c(m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl"] -dt7 = M[, c(m=lapply(.SD, mean), m=lapply(.SD, sum)), by="cyl"] -dt8 = M[, c(Mpg=list(mpg), m=lapply(.SD, mean), s=lapply(.SD, sum)), by="cyl", .SDcols=c("vs", "am")] -dt9 = M[, c(Mpg=list(mpg), list(mpg), maxwt=max(wt), max(wt), min_qsec=min(qsec), mean=lapply(.SD,mean), q.=lapply(.SD, function(x){sum(x)/carb[1]})),by="cyl", .SDcols=c("disp","hp","drat")] -dt10 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] -dt11 = M[, c(A=list(a=mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] -dt12 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), .SDcols=c("vs", "am")] -dt13 = M[, c(A=list(mpg, hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] -dt14 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), .SDcols=c("vs", "am")] -dt15 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] - -test(2164.01, names(dt1), c("cyl", "m.mpg", "m.disp", "m.hp", "m.drat", "m.wt", "m.qsec", "m.vs", "m.am", "m.gear", "m.carb")) -test(2164.02, names(dt2), c("cyl", "m.vs", "m.am", "s.vs", "s.am")) -test(2164.03, names(dt3), c("cyl", "m.vs", "m.am", "vs", "am")) -test(2164.04, names(dt4), c("cyl", "m.vs", "m.am", "m.am", "vs", "am", "am")) -test(2164.05, names(dt5), c("cyl", "m.vs", "m.am", "m.cyl", "vs", "am", "cyl")) -test(2164.06, names(dt6), c("cyl", paste0("m.", setdiff(names(M), "cyl")), paste0("s.", setdiff(names(M), "cyl")))) -test(2164.07, names(dt7), c("cyl", paste0("m.", setdiff(names(M), "cyl")), paste0("m.", setdiff(names(M), "cyl")))) -test(2164.08, names(dt8), c("cyl", "Mpg", "m.vs", "m.am", "s.vs", "s.am")) -test(2164.09, names(dt9), c("cyl", "Mpg", "V2", "maxwt", "V4", "min_qsec", "mean.disp", "mean.hp", "mean.drat", "q..disp", "q..hp", "q..drat")) -test(2164.10, names(dt10), c("A.a","A.b", "vs","am")) -test(2164.11, names(dt11), c("cyl", "A.a","A.b", "vs", "am")) -test(2164.12, names(dt12), c("A1","A2", "vs","am")) #A1 and A2 is consistent with base R c(A=list(0,0)) -test(2164.13, names(dt13), c("cyl","A1","A2", "vs","am")) -test(2164.14, names(dt14), c("A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) -test(2164.15, names(dt15), c("cyl", "A1","A.b","A3", "vs","am")) #A1, A.b, A3 is consistent with base R c(A=list(0,b=0,0)) +M[, " ":= hp] +M[, ".":= hp] + +sdnames <- setdiff(names(M), "cyl") +sdlist <- as.list(rep(NA,length(sdnames))) +names(sdlist) <- sdnames + +dt01 = M[, c( m=lapply(.SD, mean)), by="cyl"] +dt02 = M[, c( Mpg=list(mpg), lapply(.SD, mean)), by="cyl"] +dt03 = M[, c( Mpg=list(mpg), m=lapply(.SD, mean)), by="cyl"] +dt04 = M[, c( mpg=list(mpg), mpg=lapply(.SD, mean)), by="cyl"] +dt05 = M[, c( list(mpg), lapply(.SD, mean)), by="cyl"] +dt06 = M[, c( lapply(.SD, mean), list(mpg) ), by="cyl"] +dt07 = M[, c( lapply(.SD, mean), lapply(.SD, sum)), by="cyl"] +dt08 = M[, c(mean=lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"] +dt09 = M[, c( lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"] +dt10 = M[, c( " "=lapply(.SD, mean), "."=lapply(.SD, sum)), by="cyl"] +dt11 = M[, c(A=list(a=mpg,b=hp), lapply(.SD, mean)), by="cyl"] +dt12 = M[, c(A=list(mpg, hp), lapply(.SD, mean)), by="cyl"] +dt13 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl"] +dt14 = M[, c(A=list(mpg), lapply(.SD, mean)), by="cyl"] +dt15 = M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] +dt16 = M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] + +test(2164.001, names(dt01), c("cyl", names(c(m=sdlist)))) +test(2164.002, names(dt02), c("cyl", "Mpg", sdnames)) +test(2164.003, names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) +test(2164.004, names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) +test(2164.005, names(dt05), c("cyl", "V1", sdnames)) +test(2164.006, names(dt06), c("cyl", sdnames, "V13") ) +test(2164.007, names(dt07), c("cyl", sdnames,sdnames)) +test(2164.008, names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) +test(2164.009, names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) +test(2164.010, names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) +test(2164.011, names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) +test(2164.012, names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) +test(2164.013, names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) +test(2164.014, names(dt14), c("cyl", names(c(A=list(0))), sdnames )) +test(2164.015, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) +test(2164.016, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) + +old=options(datatable.optimize = 0) +dt01 = M[, c( m=lapply(.SD, mean)), by="cyl"] +dt02 = M[, c( Mpg=list(mpg), lapply(.SD, mean)), by="cyl"] +dt03 = M[, c( Mpg=list(mpg), m=lapply(.SD, mean)), by="cyl"] +dt04 = M[, c( mpg=list(mpg), mpg=lapply(.SD, mean)), by="cyl"] +dt05 = M[, c( list(mpg), lapply(.SD, mean)), by="cyl"] +dt06 = M[, c( lapply(.SD, mean), list(mpg) ), by="cyl"] +dt07 = M[, c( lapply(.SD, mean), lapply(.SD, sum)), by="cyl"] +dt08 = M[, c(mean=lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"] +dt09 = M[, c( lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"] +dt10 = M[, c( " "=lapply(.SD, mean), "."=lapply(.SD, sum)), by="cyl"] +dt11 = M[, c(A=list(a=mpg,b=hp), lapply(.SD, mean)), by="cyl"] +dt12 = M[, c(A=list(mpg, hp), lapply(.SD, mean)), by="cyl"] +dt13 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl"] +dt14 = M[, c(A=list(mpg), lapply(.SD, mean)), by="cyl"] +dt15 = M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] +dt16 = M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] + +test(2164.101, names(dt01), c("cyl", names(c(m=sdlist)))) +test(2164.102, names(dt02), c("cyl", "Mpg", sdnames)) +test(2164.103, names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) +test(2164.104, names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) +test(2164.105, names(dt05), c("cyl", "V1", sdnames)) +test(2164.106, names(dt06), c("cyl", sdnames, "V13") ) +test(2164.107, names(dt07), c("cyl", sdnames,sdnames)) +test(2164.108, names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) +test(2164.109, names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) +test(2164.110, names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) +test(2164.111, names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) +test(2164.112, names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) +test(2164.113, names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) +test(2164.114, names(dt14), c("cyl", names(c(A=list(0))), sdnames )) +test(2164.115, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) +test(2164.116, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) +options(old) From db69ee91d9dd103e03a78c360635e51497b60861 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Fri, 19 Feb 2021 20:03:08 -0800 Subject: [PATCH 11/31] fix breaking tests related to inconsistent naming of blank columnames --- R/data.table.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index d8882954d9..d3e0670da1 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1851,7 +1851,10 @@ replace_dot_alias = function(e) { if (any(ww)) jvnames[ww] = paste0("V",ww) setattr(ans, "names", c(bynames, jvnames)) } else { - setnames(ans,seq_along(bynames),bynames) # TO DO: reinvestigate bynames flowing from dogroups here and simplify + nonbynames <- names(ans)[-seq_along(bynames)] #related to 2311. make naming of empty columns names more consistent + ww = which(nonbynames=="") + if (any(ww)) nonbynames[ww] = paste0("V",ww) + setattr(ans, "names", c(bynames, nonbynames)) # TO DO: reinvestigate bynames flowing from dogroups here and simplify } if (byjoin && keyby && !bysameorder) { if (verbose) {last.started.at=proc.time();cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()} From 6751d344c91464d0ab23fab50d63f2930b5e740f Mon Sep 17 00:00:00 2001 From: Michael Young Date: Fri, 19 Feb 2021 20:46:02 -0800 Subject: [PATCH 12/31] added more passing tests --- inst/tests/tests.Rraw | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9d2cbcee70..f478e5638a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17288,6 +17288,8 @@ dt13 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl"] dt14 = M[, c(A=list(mpg), lapply(.SD, mean)), by="cyl"] dt15 = M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] dt16 = M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] +dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] +dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] test(2164.001, names(dt01), c("cyl", names(c(m=sdlist)))) test(2164.002, names(dt02), c("cyl", "Mpg", sdnames)) @@ -17305,6 +17307,8 @@ test(2164.013, names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) test(2164.014, names(dt14), c("cyl", names(c(A=list(0))), sdnames )) test(2164.015, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) test(2164.016, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) +test(2164.017, names(dt17), c("cyl", "V1","b", "vs", "am")) +test(2164.018, names(dt18), c("cyl", "V1","b", "vs", "am")) old=options(datatable.optimize = 0) dt01 = M[, c( m=lapply(.SD, mean)), by="cyl"] @@ -17323,6 +17327,8 @@ dt13 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl"] dt14 = M[, c(A=list(mpg), lapply(.SD, mean)), by="cyl"] dt15 = M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] dt16 = M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] +dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] +dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] test(2164.101, names(dt01), c("cyl", names(c(m=sdlist)))) test(2164.102, names(dt02), c("cyl", "Mpg", sdnames)) @@ -17340,4 +17346,6 @@ test(2164.113, names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) test(2164.114, names(dt14), c("cyl", names(c(A=list(0))), sdnames )) test(2164.115, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) test(2164.116, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) +test(2164.117, names(dt17), c("cyl", "V1","b", "vs", "am")) +test(2164.118, names(dt18), c("cyl", "V1","b", "vs", "am")) options(old) From fa0009e0dd632f9970047f6009ad3421421b671b Mon Sep 17 00:00:00 2001 From: Michael Young Date: Fri, 19 Feb 2021 22:33:57 -0800 Subject: [PATCH 13/31] cleaned up news item --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index e811f52070..92c7b8dec2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,7 +10,7 @@ 2. `fintersect()` now retains the order of the first argument as reasonably expected, rather than retaining the order of the second argument, [#4716](https://github.com/Rdatatable/data.table/issues/4716). Thanks to Michel Lang for reporting, and Ben Schwen for the PR. -3. Tagged/named arguments in j=c() calls now result in correctly named columns, [#2311](https://github.com/Rdatatable/data.table/issues/2311) Tagging/naming an lapply(.SD, FUN) call as an argument of c() in j will now always cause those tags to get prepended (with a single dot separator) to the resulting column names, e.g. x[, c(mean=lapply(.SD, mean)), by="z"]. Previously, prepending these tags was applied inconsistently. They were omitted in the presence of a by statement. This change also applies to tagging/naming a list() call as an argument of c() in j. Previously tags/names were omitted when there was both a by statement and the presence of a lapply(.SD, FUN) call as another argument of c(), e.g. x[, c(tag1=list(var1, b=var2), lapply(.SD, FUN)), by="z"]. Naming when j=c() should now more closely follow base R conventions for concatenating named lists. Thanks to @franknarf1 for reporting and Michael T Young for the PR. +3. Tagging/naming arguments of `c()` in `j=c()` should now more closely follow base R conventions for named concatenation of lists, [#2311](https://github.com/Rdatatable/data.table/issues/2311). Tagging/naming an `lapply(.SD, FUN)` call as an argument of `c()` in `j` will now always cause that tag to get prepended (with a single dot separator) to the resulting column names. For example, `mtcars[, c(mean=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.am", "mean.carb")`. Previously, prepending these tags was applied inconsistently--they were omitted in the presence of a by statement. Additionally, tagging/naming a `list()` call as an argument of `c()` in `j` will now always cause that tag to get prepended to any tags/names specified within the list call. For example, `mtcars[, c(mean=list(a=mean(hp), b=mean(wt)), sum=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.a", "mean.b", "sum.am", "sum.carb")`. Previously prepended tags/names were omitted when there was both a by statement and the presence of a `lapply(.SD, FUN)` call as another argument of `c()`. While the names returned by `data.table` when `j=c()` will now mostly follow base R conventions for concatenating lists, the major exception, as always, is that names which are completely unspecified will still be named according to position in j (e.g. `V1`, `V2`). For example, note that `mtcars[, c(list(mean(hp), b=mean(wt)),c=list(mean(cyl)))]` has names `c("V1", "b", "c")` whereas `c(list(1,b=2),list(c=3))` has names `c("", "b", "c")`. Thanks to @franknarf1 for reporting and Michael Young for the PR. ## NOTES From 6ce93bfc9e7ebdc958c4a7fd584db494f070ec89 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Fri, 19 Feb 2021 22:51:41 -0800 Subject: [PATCH 14/31] added another passing test ensuring dt[, c(x[1], list(.),] gets a variable name for the column created by x[1]. previously this could be an empty string column name in some circumstances --- inst/tests/tests.Rraw | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f478e5638a..59eb462f1e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17290,6 +17290,7 @@ dt15 = M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] dt16 = M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] +dt19 = M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] test(2164.001, names(dt01), c("cyl", names(c(m=sdlist)))) test(2164.002, names(dt02), c("cyl", "Mpg", sdnames)) @@ -17309,6 +17310,8 @@ test(2164.015, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnam test(2164.016, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) test(2164.017, names(dt17), c("cyl", "V1","b", "vs", "am")) test(2164.018, names(dt18), c("cyl", "V1","b", "vs", "am")) +test(2164.019, names(dt19), c("cyl", "V1","V2","b", "vs", "am")) + old=options(datatable.optimize = 0) dt01 = M[, c( m=lapply(.SD, mean)), by="cyl"] @@ -17329,6 +17332,7 @@ dt15 = M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] dt16 = M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] +dt19 = M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] test(2164.101, names(dt01), c("cyl", names(c(m=sdlist)))) test(2164.102, names(dt02), c("cyl", "Mpg", sdnames)) @@ -17348,4 +17352,5 @@ test(2164.115, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnam test(2164.116, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) test(2164.117, names(dt17), c("cyl", "V1","b", "vs", "am")) test(2164.118, names(dt18), c("cyl", "V1","b", "vs", "am")) +test(2164.119, names(dt19), c("cyl", "V1","V2","b", "vs", "am")) options(old) From fac24aaaec72dbb9ba76de8d17c693c3a1baf51f Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sat, 20 Feb 2021 10:24:53 -0800 Subject: [PATCH 15/31] fix code style --- NEWS.md | 2 +- R/data.table.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 92c7b8dec2..d5b5d6b170 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,7 +10,7 @@ 2. `fintersect()` now retains the order of the first argument as reasonably expected, rather than retaining the order of the second argument, [#4716](https://github.com/Rdatatable/data.table/issues/4716). Thanks to Michel Lang for reporting, and Ben Schwen for the PR. -3. Tagging/naming arguments of `c()` in `j=c()` should now more closely follow base R conventions for named concatenation of lists, [#2311](https://github.com/Rdatatable/data.table/issues/2311). Tagging/naming an `lapply(.SD, FUN)` call as an argument of `c()` in `j` will now always cause that tag to get prepended (with a single dot separator) to the resulting column names. For example, `mtcars[, c(mean=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.am", "mean.carb")`. Previously, prepending these tags was applied inconsistently--they were omitted in the presence of a by statement. Additionally, tagging/naming a `list()` call as an argument of `c()` in `j` will now always cause that tag to get prepended to any tags/names specified within the list call. For example, `mtcars[, c(mean=list(a=mean(hp), b=mean(wt)), sum=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.a", "mean.b", "sum.am", "sum.carb")`. Previously prepended tags/names were omitted when there was both a by statement and the presence of a `lapply(.SD, FUN)` call as another argument of `c()`. While the names returned by `data.table` when `j=c()` will now mostly follow base R conventions for concatenating lists, the major exception, as always, is that names which are completely unspecified will still be named according to position in j (e.g. `V1`, `V2`). For example, note that `mtcars[, c(list(mean(hp), b=mean(wt)),c=list(mean(cyl)))]` has names `c("V1", "b", "c")` whereas `c(list(1,b=2),list(c=3))` has names `c("", "b", "c")`. Thanks to @franknarf1 for reporting and Michael Young for the PR. +3. Tagging/naming arguments of `c()` in `j=c()` should now more closely follow base R conventions for named concatenation of lists, [#2311](https://github.com/Rdatatable/data.table/issues/2311). Tagging/naming an `lapply(.SD, FUN)` call as an argument of `c()` in `j` will now always cause that tag to get prepended (with a single dot separator) to the resulting column names. For example, `mtcars[, c(mean=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.am", "mean.carb")`. Previously, prepending these tags was applied inconsistently--they were omitted in the presence of a by statement. Additionally, tagging/naming a `list()` call as an argument of `c()` in `j` will now always cause that tag to get prepended to any tags/names specified within the list call. For example, `mtcars[, c(mean=list(a=mean(hp), b=mean(wt)), sum=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.a", "mean.b", "sum.am", "sum.carb")`. Previously prepended tags/names were omitted when there was both a by statement and the presence of a `lapply(.SD, FUN)` call as another argument of `c()`. While the names returned by `data.table` when `j=c()` will now mostly follow base R conventions for concatenating lists, the major exception, as always, is that names which are completely unspecified will still be named according to position in `j` (e.g. `V1`, `V2`). For example, note that `mtcars[, c(list(mean(hp), b=mean(wt)),c=list(mean(cyl)))]` has names `c("V1", "b", "c")` whereas `c(list(1,b=2),list(c=3))` has names `c("", "b", "c")`. Thanks to @franknarf1 for reporting and Michael Young for the PR. ## NOTES diff --git a/R/data.table.R b/R/data.table.R index d3e0670da1..41956d4854 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1851,7 +1851,7 @@ replace_dot_alias = function(e) { if (any(ww)) jvnames[ww] = paste0("V",ww) setattr(ans, "names", c(bynames, jvnames)) } else { - nonbynames <- names(ans)[-seq_along(bynames)] #related to 2311. make naming of empty columns names more consistent + nonbynames = names(ans)[-seq_along(bynames)] #related to 2311. make naming of empty columns names more consistent ww = which(nonbynames=="") if (any(ww)) nonbynames[ww] = paste0("V",ww) setattr(ans, "names", c(bynames, nonbynames)) # TO DO: reinvestigate bynames flowing from dogroups here and simplify From fe298c9d96443de237147fdb631a54355d7d5683 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Tue, 31 Aug 2021 17:34:07 -0700 Subject: [PATCH 16/31] moved news item to the correct release --- NEWS.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3b5af778b0..add7ea963c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -143,7 +143,7 @@ ## BUG FIXES -1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. +1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to Michael Young and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. 2. `print(DT, trunc.cols=TRUE)` and the corresponding `datatable.print.trunc.cols` option (new feature 3 in v1.13.0) could incorrectly display an extra column, [#4266](https://github.com/Rdatatable/data.table/issues/4266). Thanks to @tdhock for the bug report and @MichaelChirico for the PR. @@ -358,6 +358,8 @@ 42. `DT[factor("id")]` now works rather than error `i has evaluated to type integer. Expecting logical, integer or double`, [#1632](https://github.com/Rdatatable/data.table/issues/1632). `DT["id"]` has worked forever by automatically converting to `DT[.("id")]` for convenience, and joins have worked forever between char/fact, fact/char and fact/fact even when levels mismatch, so it was unfortunate that `DT[factor("id")]` managed to escape the simple automatic conversion to `DT[.(factor("id"))]` which is now in place. Thanks to @aushev for reporting, and Matt Dowle for the fix. +43. Tagging/naming arguments of `c()` in `j=c()` should now more closely follow base R conventions for named concatenation of lists, [#2311](https://github.com/Rdatatable/data.table/issues/2311). Tagging/naming an `lapply(.SD, FUN)` call as an argument of `c()` in `j` will now always cause that tag to get prepended (with a single dot separator) to the resulting column names. For example, `mtcars[, c(mean=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.am", "mean.carb")`. Previously, prepending these tags was applied inconsistently--they were omitted in the presence of a by statement. Additionally, tagging/naming a `list()` call as an argument of `c()` in `j` will now always cause that tag to get prepended to any tags/names specified within the list call. For example, `mtcars[, c(mean=list(a=mean(hp), b=mean(wt)), sum=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.a", "mean.b", "sum.am", "sum.carb")`. Previously prepended tags/names were omitted when there was both a by statement and the presence of a `lapply(.SD, FUN)` call as another argument of `c()`. While the names returned by `data.table` when `j=c()` will now mostly follow base R conventions for concatenating lists, the major exception, as always, is that names which are completely unspecified will still be named according to position in `j` (e.g. `V1`, `V2`). For example, note that `mtcars[, c(list(mean(hp), b=mean(wt)),c=list(mean(cyl)))]` has names `c("V1", "b", "c")` whereas `c(list(1,b=2),list(c=3))` has names `c("", "b", "c")`. Thanks to @franknarf1 for reporting and Michael Young for the PR. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : @@ -426,7 +428,6 @@ 2. `fintersect()` now retains the order of the first argument as reasonably expected, rather than retaining the order of the second argument, [#4716](https://github.com/Rdatatable/data.table/issues/4716). Thanks to Michel Lang for reporting, and Ben Schwen for the PR. -3. Tagging/naming arguments of `c()` in `j=c()` should now more closely follow base R conventions for named concatenation of lists, [#2311](https://github.com/Rdatatable/data.table/issues/2311). Tagging/naming an `lapply(.SD, FUN)` call as an argument of `c()` in `j` will now always cause that tag to get prepended (with a single dot separator) to the resulting column names. For example, `mtcars[, c(mean=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.am", "mean.carb")`. Previously, prepending these tags was applied inconsistently--they were omitted in the presence of a by statement. Additionally, tagging/naming a `list()` call as an argument of `c()` in `j` will now always cause that tag to get prepended to any tags/names specified within the list call. For example, `mtcars[, c(mean=list(a=mean(hp), b=mean(wt)), sum=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.a", "mean.b", "sum.am", "sum.carb")`. Previously prepended tags/names were omitted when there was both a by statement and the presence of a `lapply(.SD, FUN)` call as another argument of `c()`. While the names returned by `data.table` when `j=c()` will now mostly follow base R conventions for concatenating lists, the major exception, as always, is that names which are completely unspecified will still be named according to position in `j` (e.g. `V1`, `V2`). For example, note that `mtcars[, c(list(mean(hp), b=mean(wt)),c=list(mean(cyl)))]` has names `c("V1", "b", "c")` whereas `c(list(1,b=2),list(c=3))` has names `c("", "b", "c")`. Thanks to @franknarf1 for reporting and Michael Young for the PR. ## NOTES From 1c021f023779fd73ac1ccbe80b3623cf8dd47456 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Tue, 31 Aug 2021 17:35:04 -0700 Subject: [PATCH 17/31] fixup --- NEWS.md | 1 - 1 file changed, 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index add7ea963c..1fdd1a332a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -428,7 +428,6 @@ 2. `fintersect()` now retains the order of the first argument as reasonably expected, rather than retaining the order of the second argument, [#4716](https://github.com/Rdatatable/data.table/issues/4716). Thanks to Michel Lang for reporting, and Ben Schwen for the PR. - ## NOTES 1. Compiling from source no longer requires `zlib` header files to be available, [#4844](https://github.com/Rdatatable/data.table/pull/4844). The output suggests installing `zlib` headers, and how (e.g. `zlib1g-dev` on Ubuntu) as before, but now proceeds with `gzip` compression disabled in `fwrite`. Upon calling `fwrite(DT, "file.csv.gz")` at runtime, an error message suggests to reinstall `data.table` with `zlib` headers available. This does not apply to users on Windows or Mac who install the pre-compiled binary package from CRAN. From a48b28f553905a0d83960d97e5c0655bcb99c162 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:26:17 -0700 Subject: [PATCH 18/31] modernize: options= in test() --- inst/tests/tests.Rraw | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 113aa6c9e2..20db0d8cc5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19136,8 +19136,6 @@ test(2216.017, names(dt17), c("cyl", "V1","b", "vs", "am")) test(2216.018, names(dt18), c("cyl", "V1","b", "vs", "am")) test(2216.019, names(dt19), c("cyl", "V1","V2","b", "vs", "am")) - -old=options(datatable.optimize = 0) dt01 = M[, c( m=lapply(.SD, mean)), by="cyl"] dt02 = M[, c( Mpg=list(mpg), lapply(.SD, mean)), by="cyl"] dt03 = M[, c( Mpg=list(mpg), m=lapply(.SD, mean)), by="cyl"] @@ -19158,23 +19156,22 @@ dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] dt19 = M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] -test(2216.101, names(dt01), c("cyl", names(c(m=sdlist)))) -test(2216.102, names(dt02), c("cyl", "Mpg", sdnames)) -test(2216.103, names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) -test(2216.104, names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) -test(2216.105, names(dt05), c("cyl", "V1", sdnames)) -test(2216.106, names(dt06), c("cyl", sdnames, "V13") ) -test(2216.107, names(dt07), c("cyl", sdnames,sdnames)) -test(2216.108, names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) -test(2216.109, names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) -test(2216.110, names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) -test(2216.111, names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) -test(2216.112, names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) -test(2216.113, names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) -test(2216.114, names(dt14), c("cyl", names(c(A=list(0))), sdnames )) -test(2216.115, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) -test(2216.116, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) -test(2216.117, names(dt17), c("cyl", "V1","b", "vs", "am")) -test(2216.118, names(dt18), c("cyl", "V1","b", "vs", "am")) -test(2216.119, names(dt19), c("cyl", "V1","V2","b", "vs", "am")) -options(old) +test(2216.101, options=c(datatable.optimize=0), names(dt01), c("cyl", names(c(m=sdlist)))) +test(2216.102, options=c(datatable.optimize=0), names(dt02), c("cyl", "Mpg", sdnames)) +test(2216.103, options=c(datatable.optimize=0), names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) +test(2216.104, options=c(datatable.optimize=0), names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) +test(2216.105, options=c(datatable.optimize=0), names(dt05), c("cyl", "V1", sdnames)) +test(2216.106, options=c(datatable.optimize=0), names(dt06), c("cyl", sdnames, "V13") ) +test(2216.107, options=c(datatable.optimize=0), names(dt07), c("cyl", sdnames,sdnames)) +test(2216.108, options=c(datatable.optimize=0), names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) +test(2216.109, options=c(datatable.optimize=0), names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) +test(2216.110, options=c(datatable.optimize=0), names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) +test(2216.111, options=c(datatable.optimize=0), names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) +test(2216.112, options=c(datatable.optimize=0), names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) +test(2216.113, options=c(datatable.optimize=0), names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) +test(2216.114, options=c(datatable.optimize=0), names(dt14), c("cyl", names(c(A=list(0))), sdnames )) +test(2216.115, options=c(datatable.optimize=0), names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) +test(2216.116, options=c(datatable.optimize=0), names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) +test(2216.117, options=c(datatable.optimize=0), names(dt17), c("cyl", "V1","b", "vs", "am")) +test(2216.118, options=c(datatable.optimize=0), names(dt18), c("cyl", "V1","b", "vs", "am")) +test(2216.119, options=c(datatable.optimize=0), names(dt19), c("cyl", "V1","V2","b", "vs", "am")) From 99927bab5dfa6cb4859bb9ab655ff424f4bbaed0 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:26:51 -0700 Subject: [PATCH 19/31] fix test numbers --- inst/tests/tests.Rraw | 76 +++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 20db0d8cc5..aec34d5374 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19116,25 +19116,25 @@ dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] dt19 = M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] -test(2216.001, names(dt01), c("cyl", names(c(m=sdlist)))) -test(2216.002, names(dt02), c("cyl", "Mpg", sdnames)) -test(2216.003, names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) -test(2216.004, names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) -test(2216.005, names(dt05), c("cyl", "V1", sdnames)) -test(2216.006, names(dt06), c("cyl", sdnames, "V13") ) -test(2216.007, names(dt07), c("cyl", sdnames,sdnames)) -test(2216.008, names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) -test(2216.009, names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) -test(2216.010, names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) -test(2216.011, names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) -test(2216.012, names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) -test(2216.013, names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) -test(2216.014, names(dt14), c("cyl", names(c(A=list(0))), sdnames )) -test(2216.015, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) -test(2216.016, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) -test(2216.017, names(dt17), c("cyl", "V1","b", "vs", "am")) -test(2216.018, names(dt18), c("cyl", "V1","b", "vs", "am")) -test(2216.019, names(dt19), c("cyl", "V1","V2","b", "vs", "am")) +test(2283.001, names(dt01), c("cyl", names(c(m=sdlist)))) +test(2283.002, names(dt02), c("cyl", "Mpg", sdnames)) +test(2283.003, names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) +test(2283.004, names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) +test(2283.005, names(dt05), c("cyl", "V1", sdnames)) +test(2283.006, names(dt06), c("cyl", sdnames, "V13") ) +test(2283.007, names(dt07), c("cyl", sdnames,sdnames)) +test(2283.008, names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) +test(2283.009, names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) +test(2283.010, names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) +test(2283.011, names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) +test(2283.012, names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) +test(2283.013, names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) +test(2283.014, names(dt14), c("cyl", names(c(A=list(0))), sdnames )) +test(2283.015, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) +test(2283.016, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) +test(2283.017, names(dt17), c("cyl", "V1","b", "vs", "am")) +test(2283.018, names(dt18), c("cyl", "V1","b", "vs", "am")) +test(2283.019, names(dt19), c("cyl", "V1","V2","b", "vs", "am")) dt01 = M[, c( m=lapply(.SD, mean)), by="cyl"] dt02 = M[, c( Mpg=list(mpg), lapply(.SD, mean)), by="cyl"] @@ -19156,22 +19156,22 @@ dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] dt19 = M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] -test(2216.101, options=c(datatable.optimize=0), names(dt01), c("cyl", names(c(m=sdlist)))) -test(2216.102, options=c(datatable.optimize=0), names(dt02), c("cyl", "Mpg", sdnames)) -test(2216.103, options=c(datatable.optimize=0), names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) -test(2216.104, options=c(datatable.optimize=0), names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) -test(2216.105, options=c(datatable.optimize=0), names(dt05), c("cyl", "V1", sdnames)) -test(2216.106, options=c(datatable.optimize=0), names(dt06), c("cyl", sdnames, "V13") ) -test(2216.107, options=c(datatable.optimize=0), names(dt07), c("cyl", sdnames,sdnames)) -test(2216.108, options=c(datatable.optimize=0), names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) -test(2216.109, options=c(datatable.optimize=0), names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) -test(2216.110, options=c(datatable.optimize=0), names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) -test(2216.111, options=c(datatable.optimize=0), names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) -test(2216.112, options=c(datatable.optimize=0), names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) -test(2216.113, options=c(datatable.optimize=0), names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) -test(2216.114, options=c(datatable.optimize=0), names(dt14), c("cyl", names(c(A=list(0))), sdnames )) -test(2216.115, options=c(datatable.optimize=0), names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) -test(2216.116, options=c(datatable.optimize=0), names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) -test(2216.117, options=c(datatable.optimize=0), names(dt17), c("cyl", "V1","b", "vs", "am")) -test(2216.118, options=c(datatable.optimize=0), names(dt18), c("cyl", "V1","b", "vs", "am")) -test(2216.119, options=c(datatable.optimize=0), names(dt19), c("cyl", "V1","V2","b", "vs", "am")) +test(2283.101, options=c(datatable.optimize=0), names(dt01), c("cyl", names(c(m=sdlist)))) +test(2283.102, options=c(datatable.optimize=0), names(dt02), c("cyl", "Mpg", sdnames)) +test(2283.103, options=c(datatable.optimize=0), names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) +test(2283.104, options=c(datatable.optimize=0), names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) +test(2283.105, options=c(datatable.optimize=0), names(dt05), c("cyl", "V1", sdnames)) +test(2283.106, options=c(datatable.optimize=0), names(dt06), c("cyl", sdnames, "V13") ) +test(2283.107, options=c(datatable.optimize=0), names(dt07), c("cyl", sdnames,sdnames)) +test(2283.108, options=c(datatable.optimize=0), names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) +test(2283.109, options=c(datatable.optimize=0), names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) +test(2283.110, options=c(datatable.optimize=0), names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) +test(2283.111, options=c(datatable.optimize=0), names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) +test(2283.112, options=c(datatable.optimize=0), names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) +test(2283.113, options=c(datatable.optimize=0), names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) +test(2283.114, options=c(datatable.optimize=0), names(dt14), c("cyl", names(c(A=list(0))), sdnames )) +test(2283.115, options=c(datatable.optimize=0), names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) +test(2283.116, options=c(datatable.optimize=0), names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) +test(2283.117, options=c(datatable.optimize=0), names(dt17), c("cyl", "V1","b", "vs", "am")) +test(2283.118, options=c(datatable.optimize=0), names(dt18), c("cyl", "V1","b", "vs", "am")) +test(2283.119, options=c(datatable.optimize=0), names(dt19), c("cyl", "V1","V2","b", "vs", "am")) From d85b5ff1aa4f91678d16cf0c05e99b633f2709b1 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:38:47 -0700 Subject: [PATCH 20/31] copy-edit NEWS --- NEWS.md | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 3cd5ba08ca..1aa84dcaa9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -33,7 +33,25 @@ rowwiseDT( 3. The data.table-only attribute `$.internal.selfref` is no longer set for data.frames. [#5286](https://github.com/Rdatatable/data.table/issues/5286). Thanks @OfekShilon for the report and fix. -4. Tagging/naming arguments of `c()` in `j=c()` should now more closely follow base R conventions for named concatenation of lists, [#2311](https://github.com/Rdatatable/data.table/issues/2311). Tagging/naming an `lapply(.SD, FUN)` call as an argument of `c()` in `j` will now always cause that tag to get prepended (with a single dot separator) to the resulting column names. For example, `mtcars[, c(mean=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.am", "mean.carb")`. Previously, prepending these tags was applied inconsistently--they were omitted in the presence of a by statement. Additionally, tagging/naming a `list()` call as an argument of `c()` in `j` will now always cause that tag to get prepended to any tags/names specified within the list call. For example, `mtcars[, c(mean=list(a=mean(hp), b=mean(wt)), sum=lapply(.SD,sum)),by="cyl",.SDcols=c("am","carb")]` will now have names `c("cyl", "mean.a", "mean.b", "sum.am", "sum.carb")`. Previously prepended tags/names were omitted when there was both a by statement and the presence of a `lapply(.SD, FUN)` call as another argument of `c()`. While the names returned by `data.table` when `j=c()` will now mostly follow base R conventions for concatenating lists, the major exception, as always, is that names which are completely unspecified will still be named according to position in `j` (e.g. `V1`, `V2`). For example, note that `mtcars[, c(list(mean(hp), b=mean(wt)),c=list(mean(cyl)))]` has names `c("V1", "b", "c")` whereas `c(list(1,b=2),list(c=3))` has names `c("", "b", "c")`. Thanks to @franknarf1 for reporting and Michael Young for the PR. +4. Tagging/naming arguments of `c()` in `j=c()` should now more closely follow base R conventions for concatenation of named lists during grouping, [#2311](https://github.com/Rdatatable/data.table/issues/2311). Naming an `lapply(.SD, FUN)` call as an argument of `c()` in `j` will now always cause that tag to get prepended (with a single dot separator) to the resulting column names. Additionally, naming a `list()` call as an argument of `c()` in `j` will now always cause that tag to get prepended to any names specified within the list call. This bug only affected queries with (1) `by=` grouping (2) `getOption("datatable.optimize") >= 1L` and (3) `lapply(.SD, FUN)` in `j`. + + While the names returned by `data.table` when `j=c()` will now mostly follow base R conventions for concatenating lists, note that names which are completely unspecified will still be named positionally, matching the typical behavior in `j` and `data.table()`. according to position in `j` (e.g. `V1`, `V2`). + + Thanks to @franknarf1 for reporting and @myoung3 for the PR. + + ```r + # tag 'mean' prepended to lapply()-named columns + names(mtcars[, c(mean=lapply(.SD,sum)), by="cyl", .SDcols=c("am", "carb")]) + # [1] "cyl" "mean.am" "mean.carb" + + # tag 'mean' is prepended to the first named sublist, 'sum' to the second + names(mtcars[, c(mean=list(a=mean(hp), b=mean(wt)), sum=lapply(.SD, sum)), by="cyl", .SDcols=c("am", "carb")]) + # [1] "cyl" "mean.a" "mean.b" "sum.am" "sum.carb" + + # strict base naming would result in names c("", "b", "c") here + names(mtcars[, c(list(mean(hp), b=mean(wt)), c=list(mean(cyl)))]) + # [1] "V1" "b" "c" + ``` ## NOTES From 6c249c1381b320cddc45b69a3d068111af0d06bd Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:40:57 -0700 Subject: [PATCH 21/31] redundant test objects --- inst/tests/tests.Rraw | 59 ++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 40 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aec34d5374..caf88cd672 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19116,46 +19116,25 @@ dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] dt19 = M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] -test(2283.001, names(dt01), c("cyl", names(c(m=sdlist)))) -test(2283.002, names(dt02), c("cyl", "Mpg", sdnames)) -test(2283.003, names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) -test(2283.004, names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) -test(2283.005, names(dt05), c("cyl", "V1", sdnames)) -test(2283.006, names(dt06), c("cyl", sdnames, "V13") ) -test(2283.007, names(dt07), c("cyl", sdnames,sdnames)) -test(2283.008, names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) -test(2283.009, names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) -test(2283.010, names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) -test(2283.011, names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) -test(2283.012, names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) -test(2283.013, names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) -test(2283.014, names(dt14), c("cyl", names(c(A=list(0))), sdnames )) -test(2283.015, names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) -test(2283.016, names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) -test(2283.017, names(dt17), c("cyl", "V1","b", "vs", "am")) -test(2283.018, names(dt18), c("cyl", "V1","b", "vs", "am")) -test(2283.019, names(dt19), c("cyl", "V1","V2","b", "vs", "am")) - -dt01 = M[, c( m=lapply(.SD, mean)), by="cyl"] -dt02 = M[, c( Mpg=list(mpg), lapply(.SD, mean)), by="cyl"] -dt03 = M[, c( Mpg=list(mpg), m=lapply(.SD, mean)), by="cyl"] -dt04 = M[, c( mpg=list(mpg), mpg=lapply(.SD, mean)), by="cyl"] -dt05 = M[, c( list(mpg), lapply(.SD, mean)), by="cyl"] -dt06 = M[, c( lapply(.SD, mean), list(mpg) ), by="cyl"] -dt07 = M[, c( lapply(.SD, mean), lapply(.SD, sum)), by="cyl"] -dt08 = M[, c(mean=lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"] -dt09 = M[, c( lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"] -dt10 = M[, c( " "=lapply(.SD, mean), "."=lapply(.SD, sum)), by="cyl"] -dt11 = M[, c(A=list(a=mpg,b=hp), lapply(.SD, mean)), by="cyl"] -dt12 = M[, c(A=list(mpg, hp), lapply(.SD, mean)), by="cyl"] -dt13 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl"] -dt14 = M[, c(A=list(mpg), lapply(.SD, mean)), by="cyl"] -dt15 = M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] -dt16 = M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] -dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] -dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] -dt19 = M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] - +test(2283.001, options=c(datatable.optimize=1), names(dt01), c("cyl", names(c(m=sdlist)))) +test(2283.002, options=c(datatable.optimize=1), names(dt02), c("cyl", "Mpg", sdnames)) +test(2283.003, options=c(datatable.optimize=1), names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) +test(2283.004, options=c(datatable.optimize=1), names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) +test(2283.005, options=c(datatable.optimize=1), names(dt05), c("cyl", "V1", sdnames)) +test(2283.006, options=c(datatable.optimize=1), names(dt06), c("cyl", sdnames, "V13") ) +test(2283.007, options=c(datatable.optimize=1), names(dt07), c("cyl", sdnames,sdnames)) +test(2283.008, options=c(datatable.optimize=1), names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) +test(2283.009, options=c(datatable.optimize=1), names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) +test(2283.010, options=c(datatable.optimize=1), names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) +test(2283.011, options=c(datatable.optimize=1), names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) +test(2283.012, options=c(datatable.optimize=1), names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) +test(2283.013, options=c(datatable.optimize=1), names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) +test(2283.014, options=c(datatable.optimize=1), names(dt14), c("cyl", names(c(A=list(0))), sdnames )) +test(2283.015, options=c(datatable.optimize=1), names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) +test(2283.016, options=c(datatable.optimize=1), names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) +test(2283.017, options=c(datatable.optimize=1), names(dt17), c("cyl", "V1","b", "vs", "am")) +test(2283.018, options=c(datatable.optimize=1), names(dt18), c("cyl", "V1","b", "vs", "am")) +test(2283.019, options=c(datatable.optimize=1), names(dt19), c("cyl", "V1","V2","b", "vs", "am")) test(2283.101, options=c(datatable.optimize=0), names(dt01), c("cyl", names(c(m=sdlist)))) test(2283.102, options=c(datatable.optimize=0), names(dt02), c("cyl", "Mpg", sdnames)) test(2283.103, options=c(datatable.optimize=0), names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) From 07fd7e76633da71d8cc5d9620522f6b2a2c35e27 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:42:43 -0700 Subject: [PATCH 22/31] test in a loop, also test opt=2 --- inst/tests/tests.Rraw | 59 +++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 38 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index caf88cd672..5566224e5f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19116,41 +19116,24 @@ dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] dt19 = M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] -test(2283.001, options=c(datatable.optimize=1), names(dt01), c("cyl", names(c(m=sdlist)))) -test(2283.002, options=c(datatable.optimize=1), names(dt02), c("cyl", "Mpg", sdnames)) -test(2283.003, options=c(datatable.optimize=1), names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) -test(2283.004, options=c(datatable.optimize=1), names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) -test(2283.005, options=c(datatable.optimize=1), names(dt05), c("cyl", "V1", sdnames)) -test(2283.006, options=c(datatable.optimize=1), names(dt06), c("cyl", sdnames, "V13") ) -test(2283.007, options=c(datatable.optimize=1), names(dt07), c("cyl", sdnames,sdnames)) -test(2283.008, options=c(datatable.optimize=1), names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) -test(2283.009, options=c(datatable.optimize=1), names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) -test(2283.010, options=c(datatable.optimize=1), names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) -test(2283.011, options=c(datatable.optimize=1), names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) -test(2283.012, options=c(datatable.optimize=1), names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) -test(2283.013, options=c(datatable.optimize=1), names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) -test(2283.014, options=c(datatable.optimize=1), names(dt14), c("cyl", names(c(A=list(0))), sdnames )) -test(2283.015, options=c(datatable.optimize=1), names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) -test(2283.016, options=c(datatable.optimize=1), names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) -test(2283.017, options=c(datatable.optimize=1), names(dt17), c("cyl", "V1","b", "vs", "am")) -test(2283.018, options=c(datatable.optimize=1), names(dt18), c("cyl", "V1","b", "vs", "am")) -test(2283.019, options=c(datatable.optimize=1), names(dt19), c("cyl", "V1","V2","b", "vs", "am")) -test(2283.101, options=c(datatable.optimize=0), names(dt01), c("cyl", names(c(m=sdlist)))) -test(2283.102, options=c(datatable.optimize=0), names(dt02), c("cyl", "Mpg", sdnames)) -test(2283.103, options=c(datatable.optimize=0), names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) -test(2283.104, options=c(datatable.optimize=0), names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) -test(2283.105, options=c(datatable.optimize=0), names(dt05), c("cyl", "V1", sdnames)) -test(2283.106, options=c(datatable.optimize=0), names(dt06), c("cyl", sdnames, "V13") ) -test(2283.107, options=c(datatable.optimize=0), names(dt07), c("cyl", sdnames,sdnames)) -test(2283.108, options=c(datatable.optimize=0), names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) -test(2283.109, options=c(datatable.optimize=0), names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) -test(2283.110, options=c(datatable.optimize=0), names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) -test(2283.111, options=c(datatable.optimize=0), names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) -test(2283.112, options=c(datatable.optimize=0), names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) -test(2283.113, options=c(datatable.optimize=0), names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) -test(2283.114, options=c(datatable.optimize=0), names(dt14), c("cyl", names(c(A=list(0))), sdnames )) -test(2283.115, options=c(datatable.optimize=0), names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) -test(2283.116, options=c(datatable.optimize=0), names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) -test(2283.117, options=c(datatable.optimize=0), names(dt17), c("cyl", "V1","b", "vs", "am")) -test(2283.118, options=c(datatable.optimize=0), names(dt18), c("cyl", "V1","b", "vs", "am")) -test(2283.119, options=c(datatable.optimize=0), names(dt19), c("cyl", "V1","V2","b", "vs", "am")) +for (opt in c(0, 1, 2)) { + test(2283 + opt/10 + 0.001, options=c(datatable.optimize=1), names(dt01), c("cyl", names(c(m=sdlist)))) + test(2283 + opt/10 + 0.002, options=c(datatable.optimize=1), names(dt02), c("cyl", "Mpg", sdnames)) + test(2283 + opt/10 + 0.003, options=c(datatable.optimize=1), names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) + test(2283 + opt/10 + 0.004, options=c(datatable.optimize=1), names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) + test(2283 + opt/10 + 0.005, options=c(datatable.optimize=1), names(dt05), c("cyl", "V1", sdnames)) + test(2283 + opt/10 + 0.006, options=c(datatable.optimize=1), names(dt06), c("cyl", sdnames, "V13") ) + test(2283 + opt/10 + 0.007, options=c(datatable.optimize=1), names(dt07), c("cyl", sdnames,sdnames)) + test(2283 + opt/10 + 0.008, options=c(datatable.optimize=1), names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) + test(2283 + opt/10 + 0.009, options=c(datatable.optimize=1), names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) + test(2283 + opt/10 + 0.010, options=c(datatable.optimize=1), names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) + test(2283 + opt/10 + 0.011, options=c(datatable.optimize=1), names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) + test(2283 + opt/10 + 0.012, options=c(datatable.optimize=1), names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) + test(2283 + opt/10 + 0.013, options=c(datatable.optimize=1), names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) + test(2283 + opt/10 + 0.014, options=c(datatable.optimize=1), names(dt14), c("cyl", names(c(A=list(0))), sdnames )) + test(2283 + opt/10 + 0.015, options=c(datatable.optimize=1), names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) + test(2283 + opt/10 + 0.016, options=c(datatable.optimize=1), names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) + test(2283 + opt/10 + 0.017, options=c(datatable.optimize=1), names(dt17), c("cyl", "V1","b", "vs", "am")) + test(2283 + opt/10 + 0.018, options=c(datatable.optimize=1), names(dt18), c("cyl", "V1","b", "vs", "am")) + test(2283 + opt/10 + 0.019, options=c(datatable.optimize=1), names(dt19), c("cyl", "V1","V2","b", "vs", "am")) +} From f6214aaebe3f0f0940d160da5aa8b45607581f62 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:43:32 -0700 Subject: [PATCH 23/31] consistent GH name, reduce diff --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 1aa84dcaa9..724dd3ae1e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -424,7 +424,7 @@ rowwiseDT( # 2: 3 ``` -23. `DT[, head(.SD,n), by=grp]` and `tail` are now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). `n==1` was already optimized. Thanks to Jan Gorecki and Michael Young for requesting, and Benjamin Schwendinger for the PR. +23. `DT[, head(.SD,n), by=grp]` and `tail` are now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). `n==1` was already optimized. Thanks to Jan Gorecki and @myoung3 for requesting, and Benjamin Schwendinger for the PR. 24. `setcolorder()` gains `before=` and `after=`, [#4358](https://github.com/Rdatatable/data.table/issues/4358). Thanks to Matthias Gomolka for the request, and both Benjamin Schwendinger and Xianghui Dong for implementing. Also thanks to Manuel López-Ibáñez for testing dev and mentioning needed documentation before release. From 226e332348ac9b47c5890a6b7ad689ef239646a6 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:46:38 -0700 Subject: [PATCH 24/31] same --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 724dd3ae1e..5869d49eae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -589,7 +589,7 @@ rowwiseDT( ## BUG FIXES -1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to Michael Young and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. +1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries. 2. `print(DT, trunc.cols=TRUE)` and the corresponding `datatable.print.trunc.cols` option (new feature 3 in v1.13.0) could incorrectly display an extra column, [#4266](https://github.com/Rdatatable/data.table/issues/4266). Thanks to @tdhock for the bug report and @MichaelChirico for the PR. From a25a53e20735fc64c924fc7f100612123c4f92da Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:47:23 -0700 Subject: [PATCH 25/31] oops, undo that one. Prefer leaving released NEWS alone --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 5869d49eae..652135536f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -424,7 +424,7 @@ rowwiseDT( # 2: 3 ``` -23. `DT[, head(.SD,n), by=grp]` and `tail` are now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). `n==1` was already optimized. Thanks to Jan Gorecki and @myoung3 for requesting, and Benjamin Schwendinger for the PR. +23. `DT[, head(.SD,n), by=grp]` and `tail` are now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). `n==1` was already optimized. Thanks to Jan Gorecki and Michael Young for requesting, and Benjamin Schwendinger for the PR. 24. `setcolorder()` gains `before=` and `after=`, [#4358](https://github.com/Rdatatable/data.table/issues/4358). Thanks to Matthias Gomolka for the request, and both Benjamin Schwendinger and Xianghui Dong for implementing. Also thanks to Manuel López-Ibáñez for testing dev and mentioning needed documentation before release. From 5efb327aa2fe6c63ee8b892613f7bca34e73ecb7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:56:13 -0700 Subject: [PATCH 26/31] Style touch-up, refer to tests over long comment --- R/data.table.R | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index f88e3be2c1..d834e7cd32 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1698,29 +1698,21 @@ replace_dot_alias = function(e) { funi = funi + 1L # Fix for #985 jsubl[[i_]] = as.list(deparse_ans[[1L]][-1L]) # just keep the '.' from list(.) jn__ = deparse_ans[[2L]] - if (!is.null(names(jsubl)[i_]) && names(jsubl)[i_] != "") { + if (isTRUE(nzchar(names(jsubl)[i_]))) { # Fix for #2311, prepend named arguments of c() to column names of .SD - # e.g. c(mean=lapply(.SD, mean)) - jn__ = paste(names(jsubl)[i_], jn__, sep=".") - # sep="." for consistency with c(A=list(a=1,b=1)) + # e.g. c(mean=lapply(.SD, mean)) + jn__ = paste(names(jsubl)[i_], jn__, sep=".") # sep="." for consistency with c(A=list(a=1,b=1)) } jvnames = c(jvnames, jn__) } else if (this[[1L]] == "list") { # also handle c(lapply(.SD, sum), list()) - silly, yes, but can happen if (length(this) > 1L) { jl__ = as.list(jsubl[[i_]])[-1L] # just keep the '.' from list(.) - if (!is.null(names(jsubl)[i_]) && names(jsubl)[i_] != "") { - # Fix for #2311, prepend named list arguments of c() to that list's names: - #e.g. x[, c(A=list(), lapply(.SD, mean)),by="z"] - #now consider: - #x[, c(A=list(x,y), lapply(.SD, mean)),by="z"] #--> names A1 A2 - #x[, c(A=list(x,b=y), lapply(.SD, mean)),by="z"] #--> names A1 A.b - #x[, c(A=list(a=x,b=y), lapply(.SD, mean)),by="z"] #--> names A.a A.b - #x[, c(A=list(x), lapply(.SD, mean)),by="z"] #--> names A (note length-1 doesn't get integer suffix) - #these all follow base R. e.g. c(A=list(0,b=0)) + if (isTRUE(nzchar(names(jsubl)[i_]))) { + # Fix for #2311, prepend named list arguments of c() to that list's names. See tests 2283.* njl__ = if (is.null(names(jl__))) rep("", length(jl__)) else names(jl__) - njl__nonblank = names(jl__) != "" - if (length(jl__)>1L) { + njl__nonblank = nzchar(names(jl__)) + if (length(jl__) > 1L) { jn__ = paste0(names(jsubl)[i_], seq_along(jl__)) } else { jn__ = names(jsubl)[i_] From 279586b11628912b6759b963f50fe1caa900b479 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 5 Sep 2024 23:56:53 -0700 Subject: [PATCH 27/31] Extra ')' in DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a9d488e046..53f11e9d04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -90,5 +90,5 @@ Authors@R: c( person("Anirban", "Chetia", role="ctb"), person("Doris", "Amoakohene", role="ctb"), person("Ivan", "Krylov", role="ctb"), - person("Michael","Young", role="ctb")) + person("Michael","Young", role="ctb") ) From 22bede1422d47916d68690efa2db0649f96d4b6a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 6 Sep 2024 00:00:46 -0700 Subject: [PATCH 28/31] nzchar again --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index d834e7cd32..20deea3f9f 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1986,8 +1986,8 @@ replace_dot_alias = function(e) { setattr(ans, "names", c(bynames, jvnames)) } else { nonbynames = names(ans)[-seq_along(bynames)] #related to 2311. make naming of empty columns names more consistent - ww = which(nonbynames=="") - if (any(ww)) nonbynames[ww] = paste0("V",ww) + ww = which(!nzchar(nonbynames)) + if (length(ww)) nonbynames[ww] = paste0("V", ww) setattr(ans, "names", c(bynames, nonbynames)) # TO DO: reinvestigate bynames flowing from dogroups here and simplify } if (byjoin && keyby && !bysameorder) { From 0237bb1cadd0abe781725c10ef432e0aaade8dbf Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 6 Sep 2024 00:13:07 -0700 Subject: [PATCH 29/31] Bring test code & result closer together for easier reading --- inst/tests/tests.Rraw | 98 +++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 40 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5566224e5f..adc1b97349 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19093,47 +19093,65 @@ M[, " ":= hp] M[, ".":= hp] sdnames <- setdiff(names(M), "cyl") -sdlist <- as.list(rep(NA,length(sdnames))) +sdlist <- as.list(rep(NA, length(sdnames))) names(sdlist) <- sdnames -dt01 = M[, c( m=lapply(.SD, mean)), by="cyl"] -dt02 = M[, c( Mpg=list(mpg), lapply(.SD, mean)), by="cyl"] -dt03 = M[, c( Mpg=list(mpg), m=lapply(.SD, mean)), by="cyl"] -dt04 = M[, c( mpg=list(mpg), mpg=lapply(.SD, mean)), by="cyl"] -dt05 = M[, c( list(mpg), lapply(.SD, mean)), by="cyl"] -dt06 = M[, c( lapply(.SD, mean), list(mpg) ), by="cyl"] -dt07 = M[, c( lapply(.SD, mean), lapply(.SD, sum)), by="cyl"] -dt08 = M[, c(mean=lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"] -dt09 = M[, c( lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"] -dt10 = M[, c( " "=lapply(.SD, mean), "."=lapply(.SD, sum)), by="cyl"] -dt11 = M[, c(A=list(a=mpg,b=hp), lapply(.SD, mean)), by="cyl"] -dt12 = M[, c(A=list(mpg, hp), lapply(.SD, mean)), by="cyl"] -dt13 = M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl"] -dt14 = M[, c(A=list(mpg), lapply(.SD, mean)), by="cyl"] -dt15 = M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] -dt16 = M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"] -dt17 = M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")] -dt18 = M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] -dt19 = M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")] - for (opt in c(0, 1, 2)) { - test(2283 + opt/10 + 0.001, options=c(datatable.optimize=1), names(dt01), c("cyl", names(c(m=sdlist)))) - test(2283 + opt/10 + 0.002, options=c(datatable.optimize=1), names(dt02), c("cyl", "Mpg", sdnames)) - test(2283 + opt/10 + 0.003, options=c(datatable.optimize=1), names(dt03), c("cyl", "Mpg", names(c(m=sdlist)))) - test(2283 + opt/10 + 0.004, options=c(datatable.optimize=1), names(dt04), c("cyl", "mpg", names(c(mpg=sdlist)))) - test(2283 + opt/10 + 0.005, options=c(datatable.optimize=1), names(dt05), c("cyl", "V1", sdnames)) - test(2283 + opt/10 + 0.006, options=c(datatable.optimize=1), names(dt06), c("cyl", sdnames, "V13") ) - test(2283 + opt/10 + 0.007, options=c(datatable.optimize=1), names(dt07), c("cyl", sdnames,sdnames)) - test(2283 + opt/10 + 0.008, options=c(datatable.optimize=1), names(dt08), c("cyl", names(c(mean=sdlist, sum=sdlist)))) - test(2283 + opt/10 + 0.009, options=c(datatable.optimize=1), names(dt09), c("cyl", sdnames, names(c(sum=sdlist))) ) - test(2283 + opt/10 + 0.010, options=c(datatable.optimize=1), names(dt10), c("cyl", names(c(" "=sdlist, "."=sdlist)))) - test(2283 + opt/10 + 0.011, options=c(datatable.optimize=1), names(dt11), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) - test(2283 + opt/10 + 0.012, options=c(datatable.optimize=1), names(dt12), c("cyl", names(c(A=list(0, 0))), sdnames )) - test(2283 + opt/10 + 0.013, options=c(datatable.optimize=1), names(dt13), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) - test(2283 + opt/10 + 0.014, options=c(datatable.optimize=1), names(dt14), c("cyl", names(c(A=list(0))), sdnames )) - test(2283 + opt/10 + 0.015, options=c(datatable.optimize=1), names(dt15), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) - test(2283 + opt/10 + 0.016, options=c(datatable.optimize=1), names(dt16), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) - test(2283 + opt/10 + 0.017, options=c(datatable.optimize=1), names(dt17), c("cyl", "V1","b", "vs", "am")) - test(2283 + opt/10 + 0.018, options=c(datatable.optimize=1), names(dt18), c("cyl", "V1","b", "vs", "am")) - test(2283 + opt/10 + 0.019, options=c(datatable.optimize=1), names(dt19), c("cyl", "V1","V2","b", "vs", "am")) + test(2283 + opt/10 + 0.001, options=c(datatable.optimize=1), + names(M[, c(m=lapply(.SD, mean)), by="cyl"]), + c("cyl", names(c(m=sdlist)))) + test(2283 + opt/10 + 0.002, options=c(datatable.optimize=1), + names(M[, c( Mpg=list(mpg), lapply(.SD, mean)), by="cyl"]), + c("cyl", "Mpg", sdnames)) + test(2283 + opt/10 + 0.003, options=c(datatable.optimize=1), + names(M[, c( Mpg=list(mpg), m=lapply(.SD, mean)), by="cyl"]), + c("cyl", "Mpg", names(c(m=sdlist)))) + test(2283 + opt/10 + 0.004, options=c(datatable.optimize=1), + names(M[, c( mpg=list(mpg), mpg=lapply(.SD, mean)), by="cyl"]), + c("cyl", "mpg", names(c(mpg=sdlist)))) + test(2283 + opt/10 + 0.005, options=c(datatable.optimize=1), + names(M[, c(list(mpg), lapply(.SD, mean)), by="cyl"]), + c("cyl", "V1", sdnames)) + test(2283 + opt/10 + 0.006, options=c(datatable.optimize=1), + names(M[, c(lapply(.SD, mean), list(mpg)), by="cyl"]), + c("cyl", sdnames, "V13") ) + test(2283 + opt/10 + 0.007, options=c(datatable.optimize=1), + names(M[, c(lapply(.SD, mean), lapply(.SD, sum)), by="cyl"]), + c("cyl", sdnames,sdnames)) + test(2283 + opt/10 + 0.008, options=c(datatable.optimize=1), + names(M[, c(mean=lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"]), + c("cyl", names(c(mean=sdlist, sum=sdlist)))) + test(2283 + opt/10 + 0.009, options=c(datatable.optimize=1), + names(M[, c(lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"]), + c("cyl", sdnames, names(c(sum=sdlist))) ) + test(2283 + opt/10 + 0.010, options=c(datatable.optimize=1), + names(M[, c(" "=lapply(.SD, mean), "."=lapply(.SD, sum)), by="cyl"]), + c("cyl", names(c(" "=sdlist, "."=sdlist)))) + test(2283 + opt/10 + 0.011, options=c(datatable.optimize=1), + names(M[, c(A=list(a=mpg,b=hp), lapply(.SD, mean)), by="cyl"]), + c("cyl", names(c(A=list(a=0, b=0))), sdnames )) + test(2283 + opt/10 + 0.012, options=c(datatable.optimize=1), + names(M[, c(A=list(mpg, hp), lapply(.SD, mean)), by="cyl"]), + c("cyl", names(c(A=list(0, 0))), sdnames )) + test(2283 + opt/10 + 0.013, options=c(datatable.optimize=1), + names(M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl"]), + c("cyl", names(c(A=list(0, b=0,0))), sdnames )) + test(2283 + opt/10 + 0.014, options=c(datatable.optimize=1), + names(M[, c(A=list(mpg), lapply(.SD, mean)), by="cyl"]), + c("cyl", names(c(A=list(0))), sdnames )) + test(2283 + opt/10 + 0.015, options=c(datatable.optimize=1), + names(M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"]), + c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) + test(2283 + opt/10 + 0.016, options=c(datatable.optimize=1), + names(M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"]), + c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) + test(2283 + opt/10 + 0.017, options=c(datatable.optimize=1), + names(M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")]), + c("cyl", "V1","b", "vs", "am")) + test(2283 + opt/10 + 0.018, options=c(datatable.optimize=1), + names(M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")]), + c("cyl", "V1","b", "vs", "am")) + test(2283 + opt/10 + 0.019, options=c(datatable.optimize=1), + names(M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")]), + c("cyl", "V1","V2","b", "vs", "am")) } From 6f2bf66e2c2bc4fcf47ecc72743454304283b91b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 6 Sep 2024 00:13:31 -0700 Subject: [PATCH 30/31] bad copy-paste: optimize=opt --- inst/tests/tests.Rraw | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index adc1b97349..9a31079a99 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19097,61 +19097,61 @@ sdlist <- as.list(rep(NA, length(sdnames))) names(sdlist) <- sdnames for (opt in c(0, 1, 2)) { - test(2283 + opt/10 + 0.001, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.001, options=c(datatable.optimize=opt), names(M[, c(m=lapply(.SD, mean)), by="cyl"]), c("cyl", names(c(m=sdlist)))) - test(2283 + opt/10 + 0.002, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.002, options=c(datatable.optimize=opt), names(M[, c( Mpg=list(mpg), lapply(.SD, mean)), by="cyl"]), c("cyl", "Mpg", sdnames)) - test(2283 + opt/10 + 0.003, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.003, options=c(datatable.optimize=opt), names(M[, c( Mpg=list(mpg), m=lapply(.SD, mean)), by="cyl"]), c("cyl", "Mpg", names(c(m=sdlist)))) - test(2283 + opt/10 + 0.004, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.004, options=c(datatable.optimize=opt), names(M[, c( mpg=list(mpg), mpg=lapply(.SD, mean)), by="cyl"]), c("cyl", "mpg", names(c(mpg=sdlist)))) - test(2283 + opt/10 + 0.005, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.005, options=c(datatable.optimize=opt), names(M[, c(list(mpg), lapply(.SD, mean)), by="cyl"]), c("cyl", "V1", sdnames)) - test(2283 + opt/10 + 0.006, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.006, options=c(datatable.optimize=opt), names(M[, c(lapply(.SD, mean), list(mpg)), by="cyl"]), c("cyl", sdnames, "V13") ) - test(2283 + opt/10 + 0.007, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.007, options=c(datatable.optimize=opt), names(M[, c(lapply(.SD, mean), lapply(.SD, sum)), by="cyl"]), c("cyl", sdnames,sdnames)) - test(2283 + opt/10 + 0.008, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.008, options=c(datatable.optimize=opt), names(M[, c(mean=lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"]), c("cyl", names(c(mean=sdlist, sum=sdlist)))) - test(2283 + opt/10 + 0.009, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.009, options=c(datatable.optimize=opt), names(M[, c(lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"]), c("cyl", sdnames, names(c(sum=sdlist))) ) - test(2283 + opt/10 + 0.010, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.010, options=c(datatable.optimize=opt), names(M[, c(" "=lapply(.SD, mean), "."=lapply(.SD, sum)), by="cyl"]), c("cyl", names(c(" "=sdlist, "."=sdlist)))) - test(2283 + opt/10 + 0.011, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.011, options=c(datatable.optimize=opt), names(M[, c(A=list(a=mpg,b=hp), lapply(.SD, mean)), by="cyl"]), c("cyl", names(c(A=list(a=0, b=0))), sdnames )) - test(2283 + opt/10 + 0.012, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.012, options=c(datatable.optimize=opt), names(M[, c(A=list(mpg, hp), lapply(.SD, mean)), by="cyl"]), c("cyl", names(c(A=list(0, 0))), sdnames )) - test(2283 + opt/10 + 0.013, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.013, options=c(datatable.optimize=opt), names(M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl"]), c("cyl", names(c(A=list(0, b=0,0))), sdnames )) - test(2283 + opt/10 + 0.014, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.014, options=c(datatable.optimize=opt), names(M[, c(A=list(mpg), lapply(.SD, mean)), by="cyl"]), c("cyl", names(c(A=list(0))), sdnames )) - test(2283 + opt/10 + 0.015, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.015, options=c(datatable.optimize=opt), names(M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"]), c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) - test(2283 + opt/10 + 0.016, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.016, options=c(datatable.optimize=opt), names(M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"]), c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) - test(2283 + opt/10 + 0.017, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.017, options=c(datatable.optimize=opt), names(M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")]), c("cyl", "V1","b", "vs", "am")) - test(2283 + opt/10 + 0.018, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.018, options=c(datatable.optimize=opt), names(M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")]), c("cyl", "V1","b", "vs", "am")) - test(2283 + opt/10 + 0.019, options=c(datatable.optimize=1), + test(2283 + opt/10 + 0.019, options=c(datatable.optimize=opt), names(M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")]), c("cyl", "V1","V2","b", "vs", "am")) } From 578abecb6b580da7fed2c80b4343c4d1a6b8717e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 6 Sep 2024 00:21:51 -0700 Subject: [PATCH 31/31] more test style --- inst/tests/tests.Rraw | 44 +++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9a31079a99..460dbf6f8b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19089,11 +19089,11 @@ test(2282.09, rowwiseDT(A=,ncols), data.table(A=ncols)) # named arguments of c() in j get prepended to lapply(.SD, FUN) #2311 M <- as.data.table(mtcars) -M[, " ":= hp] -M[, ".":= hp] +M[, " " := hp] +M[, "." := hp] sdnames <- setdiff(names(M), "cyl") -sdlist <- as.list(rep(NA, length(sdnames))) +sdlist <- vector("list", length(sdnames)) names(sdlist) <- sdnames for (opt in c(0, 1, 2)) { @@ -19101,23 +19101,23 @@ for (opt in c(0, 1, 2)) { names(M[, c(m=lapply(.SD, mean)), by="cyl"]), c("cyl", names(c(m=sdlist)))) test(2283 + opt/10 + 0.002, options=c(datatable.optimize=opt), - names(M[, c( Mpg=list(mpg), lapply(.SD, mean)), by="cyl"]), + names(M[, c(Mpg=list(mpg), lapply(.SD, mean)), by="cyl"]), c("cyl", "Mpg", sdnames)) test(2283 + opt/10 + 0.003, options=c(datatable.optimize=opt), - names(M[, c( Mpg=list(mpg), m=lapply(.SD, mean)), by="cyl"]), + names(M[, c(Mpg=list(mpg), m=lapply(.SD, mean)), by="cyl"]), c("cyl", "Mpg", names(c(m=sdlist)))) test(2283 + opt/10 + 0.004, options=c(datatable.optimize=opt), - names(M[, c( mpg=list(mpg), mpg=lapply(.SD, mean)), by="cyl"]), + names(M[, c(mpg=list(mpg), mpg=lapply(.SD, mean)), by="cyl"]), c("cyl", "mpg", names(c(mpg=sdlist)))) test(2283 + opt/10 + 0.005, options=c(datatable.optimize=opt), names(M[, c(list(mpg), lapply(.SD, mean)), by="cyl"]), c("cyl", "V1", sdnames)) test(2283 + opt/10 + 0.006, options=c(datatable.optimize=opt), names(M[, c(lapply(.SD, mean), list(mpg)), by="cyl"]), - c("cyl", sdnames, "V13") ) + c("cyl", sdnames, sprintf("V%d", length(sdnames)+1L))) test(2283 + opt/10 + 0.007, options=c(datatable.optimize=opt), - names(M[, c(lapply(.SD, mean), lapply(.SD, sum)), by="cyl"]), - c("cyl", sdnames,sdnames)) + names(M[, c(lapply(.SD, mean), lapply(.SD, sum)), by="cyl"]), + c("cyl", sdnames, sdnames)) test(2283 + opt/10 + 0.008, options=c(datatable.optimize=opt), names(M[, c(mean=lapply(.SD, mean), sum=lapply(.SD, sum)), by="cyl"]), c("cyl", names(c(mean=sdlist, sum=sdlist)))) @@ -19128,30 +19128,30 @@ for (opt in c(0, 1, 2)) { names(M[, c(" "=lapply(.SD, mean), "."=lapply(.SD, sum)), by="cyl"]), c("cyl", names(c(" "=sdlist, "."=sdlist)))) test(2283 + opt/10 + 0.011, options=c(datatable.optimize=opt), - names(M[, c(A=list(a=mpg,b=hp), lapply(.SD, mean)), by="cyl"]), - c("cyl", names(c(A=list(a=0, b=0))), sdnames )) + names(M[, c(A=list(a=mpg, b=hp), lapply(.SD, mean)), by="cyl"]), + c("cyl", names(c(A=list(a=0, b=0))), sdnames)) test(2283 + opt/10 + 0.012, options=c(datatable.optimize=opt), names(M[, c(A=list(mpg, hp), lapply(.SD, mean)), by="cyl"]), - c("cyl", names(c(A=list(0, 0))), sdnames )) + c("cyl", names(c(A=list(0, 0))), sdnames)) test(2283 + opt/10 + 0.013, options=c(datatable.optimize=opt), names(M[, c(A=list(mpg, b=hp, wt), lapply(.SD, mean)), by="cyl"]), - c("cyl", names(c(A=list(0, b=0,0))), sdnames )) + c("cyl", names(c(A=list(0, b=0, 0))), sdnames)) test(2283 + opt/10 + 0.014, options=c(datatable.optimize=opt), names(M[, c(A=list(mpg), lapply(.SD, mean)), by="cyl"]), - c("cyl", names(c(A=list(0))), sdnames )) + c("cyl", names(c(A=list(0))), sdnames)) test(2283 + opt/10 + 0.015, options=c(datatable.optimize=opt), names(M[, c(" "=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"]), - c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames )) + c("cyl", names(c(" "=list(" "=0, "."=0, 0))), sdnames)) test(2283 + opt/10 + 0.016, options=c(datatable.optimize=opt), names(M[, c("."=list(" "=hp, "."=disp, mpg), lapply(.SD, mean)), by="cyl"]), - c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames )) + c("cyl", names(c("."=list(" "=0, "."=0, 0))), sdnames)) test(2283 + opt/10 + 0.017, options=c(datatable.optimize=opt), - names(M[, c(list(mpg,b=hp),lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")]), - c("cyl", "V1","b", "vs", "am")) + names(M[, c(list(mpg, b=hp), lapply(.SD, mean)), by="cyl", .SDcols=c("vs", "am")]), + c("cyl", "V1", "b", "vs", "am")) test(2283 + opt/10 + 0.018, options=c(datatable.optimize=opt), - names(M[, c(list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")]), - c("cyl", "V1","b", "vs", "am")) + names(M[, c(list(mpg, b=hp), c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")]), + c("cyl", "V1", "b", "vs", "am")) test(2283 + opt/10 + 0.019, options=c(datatable.optimize=opt), - names(M[, c(mpg[1], list(mpg,b=hp),c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")]), - c("cyl", "V1","V2","b", "vs", "am")) + names(M[, c(mpg[1], list(mpg, b=hp), c(lapply(.SD, mean))), by="cyl", .SDcols=c("vs", "am")]), + c("cyl", "V1", "V2", "b", "vs", "am")) }