diff --git a/NEWS.md b/NEWS.md index 669b5a7174..7bfd9a416a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,7 +44,9 @@ 5. `fwrite(x, row.names=TRUE)` with `x` a `matrix` writes `row.names` when present, not row numbers, [#5315](https://github.com/Rdatatable/data.table/issues/5315). Thanks to @Liripo for the report, and @ben-schwen for the fix. -3. `patterns()` helper for `.SDcols` now accepts arguments `ignore.case`, `perl`, `fixed`, and `useBytes`, which are passed to `grep`, #5387. Thanks to @iago-pssjd for the feature request, and @tdhock for the implementation. +6. `patterns()` helper for `.SDcols` now accepts arguments `ignore.case`, `perl`, `fixed`, and `useBytes`, which are passed to `grep`, #5387. Thanks to @iago-pssjd for the feature request, and @tdhock for the implementation. + +7. `melt` returns an integer column for `variable` when `measure.vars` is a list of length=1, consistent with the documented behavior, [#5209](https://github.com/Rdatatable/data.table/issues/5209). Thanks to @tdhock for reporting and fixing. Any users who were relying on this behavior can change `measure.vars=list("col_name")` (output `variable` was column name, now is column index/integer) to `measure.vars="col_name"` (`variable` still is column name). ## NOTES diff --git a/R/data.table.R b/R/data.table.R index de28676b7b..89309e58b3 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1022,8 +1022,13 @@ replace_dot_alias = function(e) { .SDcols = eval(colsub, setattr(as.list(seq_along(x)), 'names', names_x), parent.frame()) } else { if (colsub %iscall% 'patterns') { - # each pattern gives a new filter condition, intersect the end result - .SDcols = Reduce(intersect, eval_with_cols(colsub, names_x)) + patterns_list_or_vector = eval_with_cols(colsub, names_x) + .SDcols = if (is.list(patterns_list_or_vector)) { + # each pattern gives a new filter condition, intersect the end result + Reduce(intersect, patterns_list_or_vector) + } else { + patterns_list_or_vector + } } else { .SDcols = eval(colsub, parent.frame(), parent.frame()) # allow filtering via function in .SDcols, #3950 diff --git a/R/fmelt.R b/R/fmelt.R index 092da48b97..23f07c5529 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -30,6 +30,7 @@ patterns = function(..., cols=character(0L), ignore.case=FALSE, perl=FALSE, fixe # replace with lengths when R 3.2.0 dependency arrives if (length(idx <- which(sapply(matched, length) == 0L))) stopf('Pattern(s) not found: [%s]', brackify(p[idx])) + if (length(matched) == 1L) return(matched[[1L]]) matched } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1fca4bead5..5f6206c42c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17286,7 +17286,13 @@ exid = data.table(id=1, expected) test(2182.3, melt(DTid, measure.vars=list(a=c(NA,1), b=2:3), id.vars="id"), exid) test(2182.4, melt(DTid, measure.vars=list(a=c(NA,"a2"), b=c("b1","b2")), id.vars="id"), exid) test(2182.5, melt(DT.wide, measure.vars=list(a=c(NA,1), b=2:3), na.rm=TRUE), data.table(variable=factor(2), a=2, b=2)) -test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("b1","b2")), b=c(1,2))) # measure.vars named list length=1, #5065 +test(2182.6, melt(DT.wide, measure.vars=list(b=c("b1","b2"))), data.table(a2=2, variable=factor(c("1","2")), b=c(1,2))) # measure.vars named list length=1, #5065 +# consistency between measure.vars=list with length=1 and length>1, #5209 +test(2182.71, melt(DT.wide, measure.vars=list("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor(1), value=2)) +test(2182.72, melt(DT.wide, measure.vars=c("a2"), variable.factor=TRUE), data.table(b1=1, b2=2, variable=factor("a2"), value=2)) +test(2182.73, melt(DT.wide, measure.vars=list("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable="1", value=2)) +test(2182.74, melt(DT.wide, measure.vars=c("a2"), variable.factor=FALSE), data.table(b1=1, b2=2, variable="a2", value=2)) +test(2182.75, melt(data.table(a=10, b=20), measure.vars=list(n="a"), variable.factor=FALSE), data.table(b=20, variable="1", n=10))#thanks @mnazarov ### First block testing measurev # new variable_table attribute for measure.vars, PR#4731 for multiple issues diff --git a/src/fmelt.c b/src/fmelt.c index 9990da2fcd..502e576e0d 100644 --- a/src/fmelt.c +++ b/src/fmelt.c @@ -293,7 +293,8 @@ struct processData { totlen, // of output/long DT result of melt operation. nrow; // of input/wide DT to be melted. SEXPTYPE *maxtype; - Rboolean narm; // remove missing values? + bool measure_is_list, + narm; // remove missing values? }; static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valnames, Rboolean narm, Rboolean verbose, struct processData *data) { @@ -302,6 +303,7 @@ static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valna SEXPTYPE type; data->lmax = 0; data->totlen = 0; data->nrow = length(VECTOR_ELT(DT, 0)); SET_VECTOR_ELT(data->RCHK, 0, vars = checkVars(DT, id, measure, verbose)); + data->measure_is_list = !isNull(measure) && isNewList(measure); // NB: NULL passes isNewList() hence !isNull() too data->idcols = VECTOR_ELT(vars, 0); data->valuecols = VECTOR_ELT(vars, 1); data->lids = length(data->idcols); @@ -594,7 +596,7 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str if (isNull(data->variable_table)) { if (!varfactor) { SET_VECTOR_ELT(ansvars, 0, target=allocVector(STRSXP, data->totlen)); - if (data->lvalues == 1) {//one value column to output. + if (!data->measure_is_list) {//one value column to output. const int *thisvaluecols = INTEGER(VECTOR_ELT(data->valuecols, 0)); for (int j=0, ansloc=0; jlmax; ++j) { const int thislen = data->narm ? length(VECTOR_ELT(data->not_NA_indices, j)) : data->nrow; @@ -613,7 +615,7 @@ SEXP getvarcols(SEXP DT, SEXP dtnames, Rboolean varfactor, Rboolean verbose, str SET_VECTOR_ELT(ansvars, 0, target=allocVector(INTSXP, data->totlen)); SEXP levels; int *td = INTEGER(target); - if (data->lvalues == 1) {//one value column to output. + if (!data->measure_is_list) {//one value column to output. SEXP thisvaluecols = VECTOR_ELT(data->valuecols, 0); int len = length(thisvaluecols); levels = PROTECT(allocVector(STRSXP, len)); protecti++;