From 217d57b06c5453e6f67b5168e5c4aca37750a013 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Wed, 25 Aug 2021 21:24:02 +0100 Subject: [PATCH 1/6] emacs fixed line endings --- inst/tests/tests.Rraw | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d477b13e7d..30e60f875e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3257,7 +3257,7 @@ Sep,33.5,19.4,15.7,11.9,0,100.8,100.8,0,12.7,12.7,0,174.1") x[ , r := as.raw(c(0, 1))] test(1037.414, melt(x, id.vars='x1', measure.vars='r'), error="Unknown column type 'raw' for column 'r'") - + # test dispatch for non-data.table objects, #4864. if (inherits(try(getNamespace("reshape2"), silent=TRUE),"try-error")) { test(1038.001, melt(as.data.frame(DT), id.vars=1:2, measure.vars=5:6), @@ -6759,7 +6759,7 @@ if (test_xts) { " 6: 1970-01-07 6", " 7: 1970-01-08 7", " 8: 1970-01-09 8", " 9: 1970-01-10 9", "10: 1970-01-11 10")) options(old) - + # as.data.table.xts(foo) had incorrect integer index with a column name called 'x', #4897 M = xts::as.xts(matrix(1, dimnames=list("2021-05-23", "x"))) # xts:: just to be extra robust; shouldn't be needed with rm(as.xts) above test(1465.19, inherits(as.data.table(M)$index,"POSIXct")) @@ -17062,7 +17062,7 @@ registerS3method("format_col", "complex", format_col.complex) x = data.table(z = c(1 + 3i, 2 - 1i, pi + 2.718i)) test(2130.12, x, output = '(1.0, 3.0i)') rm(format_col.complex) -registerS3method("format_col", "complex", format_col.default) +registerS3method("format_col", "complex", format_col.default) # otherwise it remains registered after test.data.table() and causes test 1610.1 to fail on the next run for example, and user display if they have complex data # haven't found a way to unregister an S3 method (tried registering NULL but there's an error that NULL isn't a function) @@ -17779,7 +17779,7 @@ test(2188.12, fifelse(c(TRUE, FALSE, TRUE, NA), NA, NA, as.Date("2020-01-01")), test(2188.13, fifelse(TRUE, 1L, 2.0, "a"), error="'na' is of type character but 'no' is double. Please") # smart error message test(2188.14, fifelse(TRUE, NA, 2, as.Date("2019-07-07")), error="'no' has different class than 'na'. Please") test(2188.15, fifelse(TRUE, NA, factor('a'), factor('a', levels = c('a','b'))), error="'no' and 'na' are both type factor but their levels are different") -test(2188.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA +test(2188.16, fifelse(c(NA, NA), 1L, 2L, NULL), c(NA_integer_, NA_integer_)) # NULL `na` is treated as NA # rolling join expected output on non-matching join column has been fixed #1913 DT = data.table(ID=1:5, A=c(1.3, 1.7, 2.4, 0.9, 0.6)) @@ -17821,7 +17821,7 @@ if (test_bit64) { DT[a==1, a:=12] DT[a==2, a:=as.integer64(13)] test(2193.1, DT, data.table(a = as.integer64(c(12,13,3:10)))) - + # X[Y,,by=.EACHI] when Y contains integer64 also fixed in 1.12.4, #3779 X = data.table(x=1:3) Y = data.table(x=1:2, y=as.integer64(c(10,20))) @@ -17899,7 +17899,7 @@ setDTthreads() # restore default throttle # fwrite now allows sep="", #4817 test(2202.1, fwrite(data.frame(a="id", b=letters[1:5], c=1:5), sep=""), output = c("abc", paste0("id", letters[1:5], 1:5))) -test(2202.2, fwrite(data.frame(a="id", b=1:1e2), sep=""), +test(2202.2, fwrite(data.frame(a="id", b=1:1e2), sep=""), output = c("ab", paste0("id", 1:1e2))) test(2202.3, fwrite(data.table(a=c(NA, 2, 3.01), b=c('foo', NA, 'bar')), sep=""), output=c("ab", "foo", "2", "3.01bar")) From 30cb2d93a66974425629e0d2f2f3a42e39b879c0 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Wed, 25 Aug 2021 21:26:38 +0100 Subject: [PATCH 2/6] measure.vars named list length=1 should use list name for value.name --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 30e60f875e..afa985b968 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17628,6 +17628,7 @@ exid = data.table(id=1, expected) test(2182.3, melt(DTid, measure.vars=list(a=c(NA,1), b=2:3), id.vars="id"), exid) test(2182.4, melt(DTid, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2")), id.vars="id"), exid) test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE)[, .(a, b)], data.table(a=2, b=2))#not testing variable because it is not computed correctly, #4455 +test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("b1","b2")), b=c(1,2)))#measure.vars named list length=1, #5065 ### First block testing measurev # new variable_table attribute for measure.vars, PR#4731 for multiple issues From 5d46117df13ee6e72020a7f836266d0ae757129b Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Wed, 25 Aug 2021 21:30:13 +0100 Subject: [PATCH 3/6] value.name taken from names of measure.vars list even when length=1 --- R/fmelt.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/fmelt.R b/R/fmelt.R index 243480445b..83963bebcd 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -60,7 +60,7 @@ measure = function(..., sep="_", pattern, cols, multiple.keyword="value.name") { stopf("each ... argument to measure must be a function with at least one argument, problem: %s", names(fun.list)[[fun.i]]) } fun.list[[fun.i]] = fun - } + } measurev.args = c( list(fun.list), L[formal.i.vec], @@ -185,7 +185,7 @@ measurev = function(fun.list, sep="_", pattern, cols, multiple.keyword="value.na } else {# single output column. structure(measure.vec, variable_table=group.dt) } -} +} melt.data.table = function(data, id.vars, measure.vars, variable.name = "variable", value.name = "value", ..., na.rm = FALSE, variable.factor = TRUE, value.factor = FALSE, @@ -200,11 +200,11 @@ melt.data.table = function(data, id.vars, measure.vars, variable.name = "variabl measure.vars = eval.result } } - if (is.list(measure.vars) && length(measure.vars) > 1L) { + if (is.list(measure.vars)) { meas.nm = names(measure.vars) if (is.null(meas.nm)) { # user-provided or default stub - if (length(value.name) == 1L) { + if (length(value.name) == 1L && length(measure.vars) > 1L) { value.name = paste0(value.name, seq_along(measure.vars)) } } else { From 74e05f9960208fd4fce3b0b77ccefb20c56c759c Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Wed, 25 Aug 2021 21:32:35 +0100 Subject: [PATCH 4/6] make test 2182.5 more complete since #4455 has been fixed --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index afa985b968..9f8e8baabe 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17627,7 +17627,7 @@ DTid = data.table(DT.wide, id=1) exid = data.table(id=1, expected) test(2182.3, melt(DTid, measure.vars=list(a=c(NA,1), b=2:3), id.vars="id"), exid) test(2182.4, melt(DTid, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2")), id.vars="id"), exid) -test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE)[, .(a, b)], data.table(a=2, b=2))#not testing variable because it is not computed correctly, #4455 +test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE), data.table(variable=factor(2), a=2, b=2)) test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("b1","b2")), b=c(1,2)))#measure.vars named list length=1, #5065 ### First block testing measurev From 0bf55da1a72a999d093d6248bab46a137ecbf350 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Wed, 25 Aug 2021 21:52:58 +0100 Subject: [PATCH 5/6] news item --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index f0581ccb52..79a6596930 100644 --- a/NEWS.md +++ b/NEWS.md @@ -286,6 +286,8 @@ # # 1: 2017-10-02 09:55:00 ``` + +39. `melt` ignored names of measure.vars list with length=1, [#5065](https://github.com/Rdatatable/data.table/issues/5065). Thanks to @keatingw for the bug report and Toby Dylan Hocking for the PR. ## NOTES From 6875dbf63c19a45fadc15bd59a2a7ebe16beb52a Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Thu, 26 Aug 2021 10:27:33 -0600 Subject: [PATCH 6/6] move news item up into the new feature item --- NEWS.md | 4 +--- inst/tests/tests.Rraw | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index ac42e1d193..c4b4861b3a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -56,7 +56,7 @@ 8. `melt()` now supports `NA` entries when specifying a list of `measure.vars`, which translate into runs of missing values in the output. Useful for melting wide data with some missing columns, [#4027](https://github.com/Rdatatable/data.table/issues/4027). Thanks to @vspinu for reporting, and @tdhock for implementing. -9. `melt()` now supports multiple output variable columns via the `variable_table` attribute of `measure.vars`, [#3396](https://github.com/Rdatatable/data.table/issues/3396) [#2575](https://github.com/Rdatatable/data.table/issues/2575) [#2551](https://github.com/Rdatatable/data.table/issues/2551), [#4998](https://github.com/Rdatatable/data.table/issues/4998). It should be a `data.table` with one row that describes each element of the `measure.vars` vector(s). These data/columns are copied to the output instead of the usual variable column. This is backwards compatible since the previous behavior (one output variable column) is used when there is no `variable_table`. New functions `measure()` and `measurev()` which use either a separator or a regex to create a `measure.vars` list/vector with `variable_table` attribute; useful for melting data that has several distinct pieces of information encoded in each column name. See new `?measure` and new section in reshape vignette. Thanks to Matthias Gomolka, Ananda Mahto, Hugh Parsonage, Mark Fairbanks for reporting, and to @tdhock for implementing. +9. `melt()` now supports multiple output variable columns via the `variable_table` attribute of `measure.vars`, [#3396](https://github.com/Rdatatable/data.table/issues/3396) [#2575](https://github.com/Rdatatable/data.table/issues/2575) [#2551](https://github.com/Rdatatable/data.table/issues/2551), [#4998](https://github.com/Rdatatable/data.table/issues/4998). It should be a `data.table` with one row that describes each element of the `measure.vars` vector(s). These data/columns are copied to the output instead of the usual variable column. This is backwards compatible since the previous behavior (one output variable column) is used when there is no `variable_table`. New functions `measure()` and `measurev()` which use either a separator or a regex to create a `measure.vars` list/vector with `variable_table` attribute; useful for melting data that has several distinct pieces of information encoded in each column name. See new `?measure` and new section in reshape vignette. Thanks to Matthias Gomolka, Ananda Mahto, Hugh Parsonage, Mark Fairbanks for reporting, and to Toby Dylon Hocking for implementing. Thanks to @keatingw for testing before release, requesting `measure()` accept single groups too [#5065](https://github.com/Rdatatable/data.table/issues/5065), and Toby for implementing. 10. A new interface for _programming on data.table_ has been added, closing [#2655](https://github.com/Rdatatable/data.table/issues/2655) and many other linked issues. It is built using base R's `substitute`-like interface via a new `env` argument to `[.data.table`. For details see the new vignette *programming on data.table*, and the new `?substitute2` manual page. Thanks to numerous users for filing requests, and Jan Gorecki for implementing. @@ -290,8 +290,6 @@ # # 1: 2017-10-02 09:55:00 ``` - -39. `melt` ignored names of measure.vars list with length=1, [#5065](https://github.com/Rdatatable/data.table/issues/5065). Thanks to @keatingw for the bug report and Toby Dylan Hocking for the PR. 39. `DT[i, sum(b), by=grp]` (and other optimized-by-group aggregates: `mean`, `var`, `sd`, `median`, `prod`, `min`, `max`, `first`, `last`, `head` and `tail`) could segfault if `i` contained row numbers and one or more were NA, [#1994](https://github.com/Rdatatable/data.table/issues/1994). Thanks to Arun Srinivasan for reporting, and Benjamin Schwendinger for the PR. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 27ddc0c21d..9a08da8b0c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17644,7 +17644,7 @@ exid = data.table(id=1, expected) test(2182.3, melt(DTid, measure.vars=list(a=c(NA,1), b=2:3), id.vars="id"), exid) test(2182.4, melt(DTid, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2")), id.vars="id"), exid) test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE), data.table(variable=factor(2), a=2, b=2)) -test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("b1","b2")), b=c(1,2)))#measure.vars named list length=1, #5065 +test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("b1","b2")), b=c(1,2))) # measure.vars named list length=1, #5065 ### First block testing measurev # new variable_table attribute for measure.vars, PR#4731 for multiple issues