From a3a9a21278f082e9c1d989fc1936e47dd366acb8 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Sat, 15 Dec 2018 13:28:58 +0530 Subject: [PATCH 1/2] shift now accept type="shift" for more intuitive give.names=T output, closes #3223 --- R/shift.R | 6 +++--- inst/tests/tests.Rraw | 14 +++++++++++--- man/shift.Rd | 5 +++-- src/shift.c | 3 ++- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/R/shift.R b/R/shift.R index f6b4e860ec..601cd3e20d 100644 --- a/R/shift.R +++ b/R/shift.R @@ -1,14 +1,14 @@ -shift <- function(x, n=1L, fill=NA, type=c("lag", "lead"), give.names=FALSE) { +shift <- function(x, n=1L, fill=NA, type=c("lag", "lead", "shift"), give.names=FALSE) { type = match.arg(type) ans = .Call(Cshift, x, as.integer(n), fill, type) - if (give.names) { + if (give.names && is.list(ans)) { if (is.null(names(x))) { xsub = substitute(x) if (is.atomic(x) && is.name(xsub)) nx = deparse(xsub, 500L) else nx = paste0("V", if (is.atomic(x)) 1L else seq_along(x)) } else nx = names(x) - setattr(ans, 'names', do.call("paste", c(CJ(nx, type, n, sorted=FALSE), sep="_"))) + setattr(ans, "names", do.call("paste", c(CJ(nx, type, n, sorted=FALSE), sep="_"))) } ans } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 30c9ec26d5..096891cb1e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12860,9 +12860,8 @@ test(1963.2, shift(DT$x, -1, type = 'lead'), c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L)) test(1963.3, shift(DT$x, -1, fill = 0L), c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 0L)) -test(1963.4, shift(DT$x, -1, give.names = TRUE), - structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), - .Names = c("V1_lag_-1", NA, NA, NA, NA, NA, NA, NA, NA, NA))) +test(1963.4, shift(DT$x, -1, give.names = TRUE), # give.names is ignored because we do not return list + c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA)) test(1963.5, shift(DT$x, -1:1), list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10, c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L))) @@ -12877,6 +12876,15 @@ test(1963.7, shift(DT, -1:1), ## some coverage tests for good measure test(1963.8, shift(DT$x, type = 'some_other_type'), error='should be one of.*lag.*lead') test(1963.9, shift(c(1+3i, 2-1i)), error = 'Unsupported type') +test(1963.10, shift(DT, -1:1, type="shift", give.names = TRUE), # new type="shift" #3223 + list(`x_shift_-1` = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), + x_shift_0 = 1:10, + x_shift_1 = c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), + `y_shift_-1` = c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA), + y_shift_0 = 10:1, + y_shift_1 = c(NA, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L))) + + # 0 column data.table should not have rownames, #3149 M0 = matrix(1:6, nrow=3, ncol=2, dimnames=list(rows=paste0("id",1:3), cols=c("v1","v2"))) diff --git a/man/shift.Rd b/man/shift.Rd index 7b5a1a6302..03f009bc5f 100644 --- a/man/shift.Rd +++ b/man/shift.Rd @@ -10,13 +10,13 @@ } \usage{ -shift(x, n=1L, fill=NA, type=c("lag", "lead"), give.names=FALSE) +shift(x, n=1L, fill=NA, type=c("lag", "lead", "shift"), give.names=FALSE) } \arguments{ \item{x}{ A vector, list, data.frame or data.table. } \item{n}{ integer vector denoting the offset by which to lead or lag the input. To create multiple lead/lag vectors, provide multiple values to \code{n}; negative values of \code{n} will "flip" the value of \code{type}, i.e., \code{n=-1} and \code{type='lead'} is the same as \code{n=1} and \code{type='lag'}. } \item{fill}{ Value to use for padding when the window goes beyond the input length. } - \item{type}{ default is \code{"lag"} (look "backwards"). The other possible value is \code{"lead"} (look "forwards"). } + \item{type}{ default is \code{"lag"} (look "backwards"). The other possible values \code{"lead"} (look "forwards") and \code{"shift"} (behave same as \code{"lag"} except given names). } \item{give.names}{default is \code{FALSE} which returns an unnamed list. When \code{TRUE}, names are automatically generated corresponding to \code{type} and \code{n}. } } \details{ @@ -39,6 +39,7 @@ shift(x, n=1, fill=NA, type="lag") shift(x, n=1:2, fill=0, type="lag") # getting a window by using positive and negative n: shift(x, n = -1:1) +shift(x, n = -1:1, type = "shift", give.names = TRUE) # on data.tables DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5]) diff --git a/src/shift.c b/src/shift.c index 940097efdc..5083ccd5a4 100644 --- a/src/shift.c +++ b/src/shift.c @@ -8,7 +8,7 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) { R_len_t i=0, j, m, nx, nk, xrows, thisk, protecti=0; SEXP x, tmp=R_NilValue, elem, ans, thisfill, klass; unsigned long long *dthisfill; - enum {LAG, LEAD} stype = LAG; + enum {LAG, LEAD/*, SHIFT, CYCLIC*/} stype = LAG; // currently SHIFT maps to LAG and CYCLIC is unimplemented (see comments in #1708) if (!length(obj)) return(obj); // NULL, list() if (isVectorAtomic(obj)) { x = PROTECT(allocVector(VECSXP, 1)); protecti++; @@ -26,6 +26,7 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) { if (!strcmp(CHAR(STRING_ELT(type, 0)), "lag")) stype = LAG; else if (!strcmp(CHAR(STRING_ELT(type, 0)), "lead")) stype = LEAD; + else if (!strcmp(CHAR(STRING_ELT(type, 0)), "shift")) stype = LAG; // when we get rid of nested if branches we can use SHIFT, for now it maps to LAG else error("Internal error: invalid type for shift(), should have been caught before. please report to data.table issue tracker"); // # nocov nx = length(x); nk = length(k); From 32eee2b9921fa06287e0c060c685f7fb00519e5f Mon Sep 17 00:00:00 2001 From: mattdowle Date: Wed, 19 Dec 2018 17:37:10 -0800 Subject: [PATCH 2/2] keep shift and flip lead/lag names; use paste not CJ; news item --- NEWS.md | 6 +++++- R/shift.R | 12 +++++++++++- inst/tests/tests.Rraw | 10 ++++++---- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index f1240fef2c..6e65d5e08d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,7 +12,11 @@ 4. `NA` in `between()` and `%between%`'s `lower` and `upper` are now taken as missing bounds and return `TRUE` rather than than `NA`. This is now documented. -5. `shift()` now interprets negative values of `n` naturally to mean the opposite `type=`, [#1708](https://github.com/Rdatatable/data.table/issues/1708). +5. `shift()` now interprets negative values of `n` to mean the opposite `type=`, [#1708](https://github.com/Rdatatable/data.table/issues/1708). When `give.names=TRUE` the result is named using a positive `n` with the appropriate `type=`. Alternatively, a new `type="shift"` names the result using a signed `n` and constant type. + ```R + shift(x, n=-5:5, give.names=TRUE) => "_lead_5" ... "_lag_5" + shift(x, n=-5:5, type="shift", give.names=TRUE) => "_shift_-5" ... "_shift_5" + ``` 5. `fwrite()` now accepts `matrix`, [#2613](https://github.com/Rdatatable/data.table/issues/2613). Thanks to Michael Chirico for the suggestion and Felipe Parages for implementing. For now matrix input is converted to data.table (which can be costly) before writing. diff --git a/R/shift.R b/R/shift.R index 601cd3e20d..7f8595c755 100644 --- a/R/shift.R +++ b/R/shift.R @@ -8,7 +8,17 @@ shift <- function(x, n=1L, fill=NA, type=c("lag", "lead", "shift"), give.names=F else nx = paste0("V", if (is.atomic(x)) 1L else seq_along(x)) } else nx = names(x) - setattr(ans, "names", do.call("paste", c(CJ(nx, type, n, sorted=FALSE), sep="_"))) + if (type!="shift") { + # flip type for negative n, #3223 + neg = (n<0L) + if (type=="lead") neg[ n==0L ] = TRUE # lead_0 should be named lag_0 for consistency + if (any(neg)) { + type = rep(type,length(n)) + type[neg] = if (type[1L]=="lead") "lag" else "lead" + n[neg] = -n[neg] + } + } + setattr(ans, "names", paste(rep(nx,each=length(n)), type, n, sep="_")) } ans } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 096891cb1e..8a79ee884c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12876,14 +12876,16 @@ test(1963.7, shift(DT, -1:1), ## some coverage tests for good measure test(1963.8, shift(DT$x, type = 'some_other_type'), error='should be one of.*lag.*lead') test(1963.9, shift(c(1+3i, 2-1i)), error = 'Unsupported type') -test(1963.10, shift(DT, -1:1, type="shift", give.names = TRUE), # new type="shift" #3223 - list(`x_shift_-1` = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), - x_shift_0 = 1:10, +test(1963.11, shift(DT, -1:1, type="shift", give.names = TRUE), # new type="shift" #3223 + ans <- list(`x_shift_-1` = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), + x_shift_0 = 1:10, x_shift_1 = c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), `y_shift_-1` = c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA), y_shift_0 = 10:1, y_shift_1 = c(NA, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L))) - +names(ans) <- c("x_lead_1", "x_lag_0", "x_lag_1", "y_lead_1", "y_lag_0", "y_lag_1") +test(1963.12, shift(DT, -1:1, type="lag", give.names = TRUE), ans) +test(1963.13, shift(DT, 1:-1, type="lead", give.names = TRUE), ans) # 0 column data.table should not have rownames, #3149