From 0273b1288a780aff9648dd8524a06c2c354b3d23 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Mon, 1 Nov 2021 21:45:20 -0700 Subject: [PATCH 01/13] expect factor(1) when measure=list --- inst/tests/tests.Rraw | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6382a13a85..a0f5ebfe43 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17735,6 +17735,8 @@ test(2182.3, melt(DTid, measure.vars=list(a=c(NA,1), b=2:3), id.vars="id"), exid test(2182.4, melt(DTid, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2")), id.vars="id"), exid) test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE), data.table(variable=factor(2), a=2, b=2)) test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("b1","b2")), b=c(1,2))) # measure.vars named list length=1, #5065 +test(2182.71, melt(DT.wide, measure.vars = list("a2")), data.table(b1=1, b2=2, variable=factor(1), value=2)) +test(2182.72, melt(DT.wide, measure.vars = c("a2")), data.table(b1=1, b2=2, variable=factor("a2"), value=2)) ### First block testing measurev # new variable_table attribute for measure.vars, PR#4731 for multiple issues From 8fa8c8fdc91e5dfe9ae3d5a591fd4fa4d8004c38 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Mon, 1 Nov 2021 22:48:14 -0700 Subject: [PATCH 02/13] melt checks if measure.vars is list --- R/data.table.R | 9 +++++++-- R/fmelt.R | 1 + inst/tests/tests.Rraw | 8 +++++--- src/fmelt.c | 12 +++++++++--- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index e020ea3e3d..8f6ffa568d 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1011,8 +1011,13 @@ replace_dot_alias = function(e) { .SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) } else { if (colsub %iscall% 'patterns') { - # each pattern gives a new filter condition, intersect the end result - .SDcols = Reduce(intersect, eval_with_cols(colsub, names_x)) + patterns_list_or_vector = eval_with_cols(colsub, names_x) + .SDcols = if (is.list(patterns_list_or_vector)) { + # each pattern gives a new filter condition, intersect the end result + Reduce(intersect, patterns_list_or_vector) + } else { + patterns_list_or_vector + } } else { .SDcols = eval(colsub, parent.frame(), parent.frame()) # allow filtering via function in .SDcols, #3950 diff --git a/R/fmelt.R b/R/fmelt.R index 83963bebcd..02011ee056 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -30,6 +30,7 @@ patterns = function(..., cols=character(0L)) { # replace with lengths when R 3.2.0 dependency arrives if (length(idx <- which(sapply(matched, length) == 0L))) stopf('Pattern(s) not found: [%s]', brackify(p[idx])) + if(length(matched)==1)return(matched[[1]]) matched } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a0f5ebfe43..b0b16217ef 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17734,9 +17734,11 @@ exid = data.table(id=1, expected) test(2182.3, melt(DTid, measure.vars=list(a=c(NA,1), b=2:3), id.vars="id"), exid) test(2182.4, melt(DTid, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2")), id.vars="id"), exid) test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE), data.table(variable=factor(2), a=2, b=2)) -test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("b1","b2")), b=c(1,2))) # measure.vars named list length=1, #5065 -test(2182.71, melt(DT.wide, measure.vars = list("a2")), data.table(b1=1, b2=2, variable=factor(1), value=2)) -test(2182.72, melt(DT.wide, measure.vars = c("a2")), data.table(b1=1, b2=2, variable=factor("a2"), value=2)) +test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("1","2")), b=c(1,2))) # measure.vars named list length=1, #5065 +test(2182.71, melt(DT.wide, measure.vars=list("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor(1), value=2)) +test(2182.72, melt(DT.wide, measure.vars=c("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor("a2"), value=2)) +test(2182.73, melt(DT.wide, measure.vars=list("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable="1", value=2)) +test(2182.74, melt(DT.wide, measure.vars=c("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable="a2", value=2)) ### First block testing measurev # new variable_table attribute for measure.vars, PR#4731 for multiple issues diff --git a/src/fmelt.c b/src/fmelt.c index 9990da2fcd..bca7904ca1 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -293,7 +293,8 @@ struct processData { totlen, // of output/long DT result of melt operation. nrow; // of input/wide DT to be melted. SEXPTYPE *maxtype; - Rboolean narm; // remove missing values? + Rboolean measure_is_list, + narm; // remove missing values? }; static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valnames, Rboolean narm, Rboolean verbose, struct processData *data) { @@ -302,6 +303,11 @@ static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valna SEXPTYPE type; data->lmax = 0; data->totlen = 0; data->nrow = length(VECTOR_ELT(DT, 0)); SET_VECTOR_ELT(data->RCHK, 0, vars = checkVars(DT, id, measure, verbose)); + if(!isNull(measure) && isNewList(measure)){ + data->measure_is_list = TRUE; + }else{ + data->measure_is_list = FALSE; + } data->idcols = VECTOR_ELT(vars, 0); data->valuecols = VECTOR_ELT(vars, 1); data->lids = length(data->idcols); @@ -594,7 +600,7 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str if (isNull(data->variable_table)) { if (!varfactor) { SET_VECTOR_ELT(ansvars, 0, target=allocVector(STRSXP, data->totlen)); - if (data->lvalues == 1) {//one value column to output. + if (!data->measure_is_list) {//one value column to output. const int *thisvaluecols = INTEGER(VECTOR_ELT(data->valuecols, 0)); for (int j=0, ansloc=0; jlmax; ++j) { const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow; @@ -613,7 +619,7 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str SET_VECTOR_ELT(ansvars, 0, target=allocVector(INTSXP, data->totlen)); SEXP levels; int *td = INTEGER(target); - if (data->lvalues == 1) {//one value column to output. + if (!data->measure_is_list) {//one value column to output. SEXP thisvaluecols = VECTOR_ELT(data->valuecols, 0); int len = length(thisvaluecols); levels = PROTECT(allocVector(STRSXP, len)); protecti++; From 9659f7af025b123f14c071d58c2c10bdea17e01c Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Mon, 1 Nov 2021 23:21:12 -0700 Subject: [PATCH 03/13] inconsistent variable between measure.vars with list of length=1 and length>1 --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 5faf40723f..95319616ca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -233,7 +233,7 @@ 12. `as.data.table(table(NULL))` now returns `data.table(NULL)` rather than error `attempt to set an attribute on NULL`, [#4179](https://github.com/Rdatatable/data.table/issues/4179). The result differs slightly to `as.data.frame(table(NULL))` (0-row, 1-column) because 0-column works better with other `data.table` functions like `rbindlist()`. Thanks to Michael Chirico for the report and fix. -13. `melt` with a list for `measure.vars` would output `variable` inconsistently between `na.rm=TRUE` and `FALSE`, [#4455](https://github.com/Rdatatable/data.table/issues/4455). Thanks to @tdhock for reporting and fixing. +13. `melt` with a list for `measure.vars` would output `variable` inconsistently between `na.rm=TRUE` and `FALSE`, [#4455](https://github.com/Rdatatable/data.table/issues/4455). Another inconsistency was between length=1 list and length>1 list, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. 14. `by=...get()...` could fail with `object not found`, [#4873](https://github.com/Rdatatable/data.table/issues/4873) [#4981](https://github.com/Rdatatable/data.table/issues/4981). Thanks to @sindribaldur for reporting, and @OfekShilon for fixing. From eff26227a7b86bc1d5dc870b40d19ad29a82c035 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Mon, 1 Nov 2021 23:34:04 -0700 Subject: [PATCH 04/13] link related issue --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b0b16217ef..88b8f390d0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17735,6 +17735,7 @@ test(2182.3, melt(DTid, measure.vars=list(a=c(NA,1), b=2:3), id.vars="id"), exid test(2182.4, melt(DTid, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2")), id.vars="id"), exid) test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE), data.table(variable=factor(2), a=2, b=2)) test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("1","2")), b=c(1,2))) # measure.vars named list length=1, #5065 +# consistency between measure.vars=list with length=1 and length>1, #5209 test(2182.71, melt(DT.wide, measure.vars=list("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor(1), value=2)) test(2182.72, melt(DT.wide, measure.vars=c("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor("a2"), value=2)) test(2182.73, melt(DT.wide, measure.vars=list("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable="1", value=2)) From afdcab77df9dd4ae6fa318d5e86333d305a0f00f Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Thu, 15 Feb 2024 15:20:30 -0700 Subject: [PATCH 05/13] move news item up --- NEWS.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 7d06a925ca..609a0af002 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,10 @@ # data.table [v1.15.99](https://github.com/Rdatatable/data.table/milestone/30) (in development) +## BUG FIXES + +X. `melt` with a list for `measure.vars` was inconsistent between length=1 list and length>1 list, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. + # data.table [v1.15.0](https://github.com/Rdatatable/data.table/milestone/29) (30 Jan 2024) ## BREAKING CHANGE @@ -321,7 +325,7 @@ 12. `as.data.table(table(NULL))` now returns `data.table(NULL)` rather than error `attempt to set an attribute on NULL`, [#4179](https://github.com/Rdatatable/data.table/issues/4179). The result differs slightly to `as.data.frame(table(NULL))` (0-row, 1-column) because 0-column works better with other `data.table` functions like `rbindlist()`. Thanks to Michael Chirico for the report and fix. -13. `melt` with a list for `measure.vars` would output `variable` inconsistently between `na.rm=TRUE` and `FALSE`, [#4455](https://github.com/Rdatatable/data.table/issues/4455). Another inconsistency was between length=1 list and length>1 list, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. +13. `melt` with a list for `measure.vars` would output `variable` inconsistently between `na.rm=TRUE` and `FALSE`, [#4455](https://github.com/Rdatatable/data.table/issues/4455). Thanks to @tdhock for reporting and fixing. 14. `by=...get()...` could fail with `object not found`, [#4873](https://github.com/Rdatatable/data.table/issues/4873) [#4981](https://github.com/Rdatatable/data.table/issues/4981). Thanks to @sindribaldur for reporting, and @OfekShilon for fixing. From 33d7b2ad6f09d4d900e16139077716353671e7af Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Thu, 15 Feb 2024 15:33:47 -0700 Subject: [PATCH 06/13] add test suggested by @mnazarov --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6b15f52a4a..87c6395f8a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17240,6 +17240,7 @@ test(2182.71, melt(DT.wide, measure.vars=list("a2"), variable.factor=TRUE), data test(2182.72, melt(DT.wide, measure.vars=c("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor("a2"), value=2)) test(2182.73, melt(DT.wide, measure.vars=list("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable="1", value=2)) test(2182.74, melt(DT.wide, measure.vars=c("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable="a2", value=2)) +test(2182.75, melt(data.table(a=10, b=20), measure.vars=list(n="a"), variable.factor=FALSE), data.table(b=20, variable="1", n=10))#thanks @mnazarov ### First block testing measurev # new variable_table attribute for measure.vars, PR#4731 for multiple issues From e24663a302e3421a74b782263d367c49a18b78a3 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Tue, 26 Mar 2024 09:43:58 -0700 Subject: [PATCH 07/13] use Michael wording --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 586de06e85..7715479615 100644 --- a/NEWS.md +++ b/NEWS.md @@ -36,7 +36,7 @@ 5. `fwrite(x, row.names=TRUE)` with `x` a `matrix` writes `row.names` when present, not row numbers, [#5315](https://github.com/Rdatatable/data.table/issues/5315). Thanks to @Liripo for the report, and @ben-schwen for the fix. -6. `melt` with a list for `measure.vars` was inconsistent between length=1 list and length>1 list, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. +6. `melt` returns an integer column for `variable` whenever `measure.vars` is a list, consistent with the documented behavior, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. ## NOTES From dbfd853bff3802d9677c716d622a2514a7f6dfa8 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Tue, 26 Mar 2024 09:45:10 -0700 Subject: [PATCH 08/13] Update R/fmelt.R Co-authored-by: Michael Chirico --- R/fmelt.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fmelt.R b/R/fmelt.R index 02011ee056..020b385189 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -30,7 +30,7 @@ patterns = function(..., cols=character(0L)) { # replace with lengths when R 3.2.0 dependency arrives if (length(idx <- which(sapply(matched, length) == 0L))) stopf('Pattern(s) not found: [%s]', brackify(p[idx])) - if(length(matched)==1)return(matched[[1]]) + if (length(matched) == 1L) return(matched[[1L]]) matched } From d3d00a91a28530739afade09efd86969da81aa8b Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Tue, 26 Mar 2024 09:46:32 -0700 Subject: [PATCH 09/13] Update src/fmelt.c Co-authored-by: Michael Chirico --- src/fmelt.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/fmelt.c b/src/fmelt.c index bca7904ca1..c36ddac989 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -303,11 +303,7 @@ static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valna SEXPTYPE type; data->lmax = 0; data->totlen = 0; data->nrow = length(VECTOR_ELT(DT, 0)); SET_VECTOR_ELT(data->RCHK, 0, vars = checkVars(DT, id, measure, verbose)); - if(!isNull(measure) && isNewList(measure)){ - data->measure_is_list = TRUE; - }else{ - data->measure_is_list = FALSE; - } + data->measure_is_list = !isNull(measure) && isNewList(measure) ? TRUE : FALSE; data->idcols = VECTOR_ELT(vars, 0); data->valuecols = VECTOR_ELT(vars, 1); data->lids = length(data->idcols); From 548d94cda24c3b54cfd8dc4ab17d43f48496d56a Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Tue, 26 Mar 2024 09:47:46 -0700 Subject: [PATCH 10/13] Rboolean->bool --- src/fmelt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fmelt.c b/src/fmelt.c index c36ddac989..2b9707bb29 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -293,7 +293,7 @@ struct processData { totlen, // of output/long DT result of melt operation. nrow; // of input/wide DT to be melted. SEXPTYPE *maxtype; - Rboolean measure_is_list, + bool measure_is_list, narm; // remove missing values? }; @@ -303,7 +303,7 @@ static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valna SEXPTYPE type; data->lmax = 0; data->totlen = 0; data->nrow = length(VECTOR_ELT(DT, 0)); SET_VECTOR_ELT(data->RCHK, 0, vars = checkVars(DT, id, measure, verbose)); - data->measure_is_list = !isNull(measure) && isNewList(measure) ? TRUE : FALSE; + data->measure_is_list = !isNull(measure) && isNewList(measure); data->idcols = VECTOR_ELT(vars, 0); data->valuecols = VECTOR_ELT(vars, 1); data->lids = length(data->idcols); From e127f4834192973166b30350a2a7731b7d392ab4 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Mon, 8 Apr 2024 09:02:33 -0700 Subject: [PATCH 11/13] Update src/fmelt.c Co-authored-by: Michael Chirico --- src/fmelt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fmelt.c b/src/fmelt.c index 2b9707bb29..502e576e0d 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -303,7 +303,7 @@ static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valna SEXPTYPE type; data->lmax = 0; data->totlen = 0; data->nrow = length(VECTOR_ELT(DT, 0)); SET_VECTOR_ELT(data->RCHK, 0, vars = checkVars(DT, id, measure, verbose)); - data->measure_is_list = !isNull(measure) && isNewList(measure); + data->measure_is_list = !isNull(measure) && isNewList(measure); // NB: NULL passes isNewList() hence !isNull() too data->idcols = VECTOR_ELT(vars, 0); data->valuecols = VECTOR_ELT(vars, 1); data->lids = length(data->idcols); From 2e1f5efa6d990b6113626856e1df5d65fa536d49 Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Mon, 8 Apr 2024 09:04:56 -0700 Subject: [PATCH 12/13] mention length=1 --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 42823bfb24..c70b6c799a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,7 +46,7 @@ 6. `patterns()` helper for `.SDcols` now accepts arguments `ignore.case`, `perl`, `fixed`, and `useBytes`, which are passed to `grep`, #5387. Thanks to @iago-pssjd for the feature request, and @tdhock for the implementation. -7. `melt` returns an integer column for `variable` whenever `measure.vars` is a list, consistent with the documented behavior, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. +7. `melt` returns an integer column for `variable` when `measure.vars` is a list of length=1, consistent with the documented behavior, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. ## NOTES From d287ba84c4d1812f6d852e764aa9e1057769b7de Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Mon, 8 Apr 2024 09:15:21 -0700 Subject: [PATCH 13/13] comment about how to upgrade --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c70b6c799a..7bfd9a416a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,7 +46,7 @@ 6. `patterns()` helper for `.SDcols` now accepts arguments `ignore.case`, `perl`, `fixed`, and `useBytes`, which are passed to `grep`, #5387. Thanks to @iago-pssjd for the feature request, and @tdhock for the implementation. -7. `melt` returns an integer column for `variable` when `measure.vars` is a list of length=1, consistent with the documented behavior, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. +7. `melt` returns an integer column for `variable` when `measure.vars` is a list of length=1, consistent with the documented behavior, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. Any users who were relying on this behavior can change `measure.vars=list("col_name")` (output `variable` was column name, now is column index/integer) to `measure.vars="col_name"` (`variable` still is column name). ## NOTES