From cb956405b7d7d47156efe98d474db4b437436004 Mon Sep 17 00:00:00 2001 From: Elio Campitelli Date: Sun, 7 Jul 2019 19:34:44 -0300 Subject: [PATCH 1/5] Sets clearer warning for keyby in ad-hoc columns --- R/data.table.R | 2 +- inst/tests/tests.Rraw | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index ce8cd56c99..f58f983f4e 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1756,7 +1756,7 @@ replace_order = function(isub, verbose, env) { setkeyv(x,cnames) # TO DO: setkey before grouping to get memcpy benefit. if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} } - else warning(":= keyby not straightforward character column names or list() of column names, treating as a by:",paste(cnames,collapse=","),"\n") + else warning("keyby can only be used with existing columns. Skipping to setkey() following computation in j.") } return(suppPrint(x)) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5f2e5fceba..d850f6718c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1912,7 +1912,7 @@ test(670, DT[,z:=sum(y),keyby=x], ans) DT = data.table(x=1:2, y=1:6) test(671, DT[,z:=sum(y),keyby="x"], ans) DT = data.table(x=1:2, y=1:6) -test(672, DT[,z:=sum(y),keyby=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)), warning=":= keyby not straightforward character column names or list() of column names, treating as a by") +test(672, DT[,z:=sum(y),keyby=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)), warning="keyby can only be used with existing columns. Skipping to setkey() following computation in j.") DT = data.table(x=1:2, y=1:6) test(673, DT[,z:=sum(y),by=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L))) DT = data.table(x=1:2, y=1:6) From cefbd538787f06dbec99d2c0edf2188fff83f00f Mon Sep 17 00:00:00 2001 From: Elio Campitelli Date: Sun, 7 Jul 2019 20:03:45 -0300 Subject: [PATCH 2/5] Changes still misleading warning. --- R/data.table.R | 2 +- inst/tests/tests.Rraw | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index f58f983f4e..5ab8bc8c84 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1756,7 +1756,7 @@ replace_order = function(isub, verbose, env) { setkeyv(x,cnames) # TO DO: setkey before grouping to get memcpy benefit. if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} } - else warning("keyby can only be used with existing columns. Skipping to setkey() following computation in j.") + else warning("keyby can only be used with existing columns.") } return(suppPrint(x)) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d850f6718c..4761d71647 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1912,7 +1912,7 @@ test(670, DT[,z:=sum(y),keyby=x], ans) DT = data.table(x=1:2, y=1:6) test(671, DT[,z:=sum(y),keyby="x"], ans) DT = data.table(x=1:2, y=1:6) -test(672, DT[,z:=sum(y),keyby=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)), warning="keyby can only be used with existing columns. Skipping to setkey() following computation in j.") +test(672, DT[,z:=sum(y),keyby=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)), warning="keyby can only be used with existing columns.") DT = data.table(x=1:2, y=1:6) test(673, DT[,z:=sum(y),by=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L))) DT = data.table(x=1:2, y=1:6) From 371af1151099459fc4c1da7aa7b78e5db5c87f64 Mon Sep 17 00:00:00 2001 From: Elio Campitelli Date: Sun, 7 Jul 2019 20:08:27 -0300 Subject: [PATCH 3/5] Updates news. --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 3e46498067..c3be722a8c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -190,6 +190,8 @@ 23. Incorrect sorting/grouping results due to a bug in Intel's `icc` compiler 2019 (Version 19.0.4.243 Build 20190416) has been worked around thanks to a report and fix by Sebastian Freundt, [#3647](https://github.com/Rdatatable/data.table/issues/3647). Please run `data.table::test.data.table()`. If that passes, your installation does not have the problem. +24. Changes the warning when using `keyby` with ad-hoc columns to be more informative. [#2763](https://github.com/Rdatatable/data.table/issues/2763). Thanks to @eliocamp. + #### NOTES 1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. From 4fc3483c482a9aad37a96de9478781ebf5c1d8c9 Mon Sep 17 00:00:00 2001 From: Elio Campitelli Date: Mon, 8 Jul 2019 13:11:04 -0300 Subject: [PATCH 4/5] Extends warning --- R/data.table.R | 2 +- inst/tests/tests.Rraw | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 5ab8bc8c84..6e71a51106 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1756,7 +1756,7 @@ replace_order = function(isub, verbose, env) { setkeyv(x,cnames) # TO DO: setkey before grouping to get memcpy benefit. if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} } - else warning("keyby can only be used with existing columns.") + else warning("keyby can only be used with existing columns, treating as by = ", paste(cnames, collapse = ","), "\n") } return(suppPrint(x)) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4761d71647..a41af567f4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1912,7 +1912,7 @@ test(670, DT[,z:=sum(y),keyby=x], ans) DT = data.table(x=1:2, y=1:6) test(671, DT[,z:=sum(y),keyby="x"], ans) DT = data.table(x=1:2, y=1:6) -test(672, DT[,z:=sum(y),keyby=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)), warning="keyby can only be used with existing columns.") +test(672, DT[,z:=sum(y),keyby=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)), warning="keyby can only be used with existing columns, treating as by") DT = data.table(x=1:2, y=1:6) test(673, DT[,z:=sum(y),by=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L))) DT = data.table(x=1:2, y=1:6) From e27cc21076d48681938520453204d2795458460e Mon Sep 17 00:00:00 2001 From: mattdowle Date: Mon, 15 Jul 2019 17:49:38 -0700 Subject: [PATCH 5/5] Expanded revised warning message --- R/data.table.R | 2 +- inst/tests/tests.Rraw | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 7e40c63fda..093cbb3047 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1756,7 +1756,7 @@ replace_order = function(isub, verbose, env) { setkeyv(x,cnames) # TO DO: setkey before grouping to get memcpy benefit. if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} } - else warning("keyby can only be used with existing columns, treating as by = ", paste(cnames, collapse = ","), "\n") + else warning("The setkey() normally performed by keyby= has been skipped (as if by= was used) because := is being used together with keyby= but the keyby= contains some expressions. To avoid this warning, use by= instead, or provide existing column names to keyby=.\n") } return(suppPrint(x)) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 232693826e..3e688942b2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1920,7 +1920,8 @@ test(670, DT[,z:=sum(y),keyby=x], ans) DT = data.table(x=1:2, y=1:6) test(671, DT[,z:=sum(y),keyby="x"], ans) DT = data.table(x=1:2, y=1:6) -test(672, DT[,z:=sum(y),keyby=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)), warning="keyby can only be used with existing columns, treating as by") +test(672, DT[,z:=sum(y),keyby=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L)), + warning="The setkey() normally performed by keyby= has been skipped (as if by= was used) because := is being used together with keyby= but the keyby= contains some expressions. To avoid this warning, use by= instead, or provide existing column names to keyby=") DT = data.table(x=1:2, y=1:6) test(673, DT[,z:=sum(y),by=x%%2], data.table(x=1:2,y=1:6,z=c(9L,12L))) DT = data.table(x=1:2, y=1:6)