From 578e953ca7ed98d69f3e3552c06cfe22eeef4574 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 30 Nov 2018 17:08:43 +0800 Subject: [PATCH 1/2] Closes #1708 -- add support for postiive and negative values of n in shift --- NEWS.md | 2 + inst/tests/tests.Rraw | 23 +++- man/shift.Rd | 8 +- src/shift.c | 264 +++++++++++++++++------------------------- 4 files changed, 137 insertions(+), 160 deletions(-) diff --git a/NEWS.md b/NEWS.md index b7d30da3af..ff9348c93a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ 3. `setnames()` gains `skip_absent` to skip names in `old` that aren't present, [#3030](https://github.com/Rdatatable/data.table/issues/3030). By default `FALSE` so that it is still an error, as before, to attempt to change a column name that is not present. Thanks to @MusTheDataGuy for the suggestion and the PR. +4. `shift` now interprets negative values to `n` naturally to mean the opposite `'type'`, [#1708](https://github.com/Rdatatable/data.table/issues/1708). + #### BUG FIXES 1. Providing an `i` subset expression when attempting to delete a column correctly failed with helpful error, but when the column was missing too created a new column full of `NULL` values, [#3089](https://github.com/Rdatatable/data.table/issues/3089). Thanks to Michael Chirico for reporting. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index dc39794791..bac583f658 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6190,7 +6190,6 @@ val = runif(1) test(1463.33, shift(val, 2L), NA_real_) test(1463.34, shift(val, 2L, type="lead"), NA_real_) -test(1463.35, shift(1:5, -1L), error="n must be non-negative integer") test(1463.36, shift(1:5, 1L, fill=c(1:2)), error="fill must be a vector of length") # add tests for date and factor? @@ -12416,6 +12415,28 @@ for (i in 1:4) { test(1959.5, fread("A\n\nB\n\nC\n1\n", skip=2), data.table(B=c("", "C", "1"))) test(1959.6, fread("A,B\r\r\nX,Y\r\r\nB,C\r\r\n1,2", skip=4), data.table(B=1L, C=2L)) +# positive and negative values for shift, #1708 +DT = data.table(x = 1:10, y = 10:1) + +test(1960.1, shift(DT$x, -1), c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA)) +test(1960.2, shift(DT$x, -1, type = 'lead'), + c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L)) +test(1960.3, shift(DT$x, -1, fill = 0L), + c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 0L)) +test(1960.4, shift(DT$x, -1, give.names = TRUE), + structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), + .Names = c("V1_lag_-1", NA, NA, NA, NA, NA, NA, NA, NA, NA))) +test(1960.5, shift(DT$x, -1:1), + list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10, + c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L))) +test(1960.6, shift(DT, -1), + list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), + c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA))) +test(1960.7, shift(DT, -1:1), + list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10, + c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), + c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA), 10:1, + c(NA, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L))) ################################### # Add new tests above this line # diff --git a/man/shift.Rd b/man/shift.Rd index 9f0546bb3e..7b5a1a6302 100644 --- a/man/shift.Rd +++ b/man/shift.Rd @@ -14,9 +14,9 @@ shift(x, n=1L, fill=NA, type=c("lag", "lead"), give.names=FALSE) } \arguments{ \item{x}{ A vector, list, data.frame or data.table. } - \item{n}{ Non-negative integer vector denoting the offset to lead or lag the input by. To create multiple lead/lag vectors, provide multiple values to \code{n}. } - \item{fill}{ Value to pad by. } - \item{type}{ default is \code{"lag"}. The other possible value is \code{"lead"}. } + \item{n}{ integer vector denoting the offset by which to lead or lag the input. To create multiple lead/lag vectors, provide multiple values to \code{n}; negative values of \code{n} will "flip" the value of \code{type}, i.e., \code{n=-1} and \code{type='lead'} is the same as \code{n=1} and \code{type='lag'}. } + \item{fill}{ Value to use for padding when the window goes beyond the input length. } + \item{type}{ default is \code{"lag"} (look "backwards"). The other possible value is \code{"lead"} (look "forwards"). } \item{give.names}{default is \code{FALSE} which returns an unnamed list. When \code{TRUE}, names are automatically generated corresponding to \code{type} and \code{n}. } } \details{ @@ -37,6 +37,8 @@ x = 1:5 shift(x, n=1, fill=NA, type="lag") # lag with n=1 and 2, and pad with 0 (returns list) shift(x, n=1:2, fill=0, type="lag") +# getting a window by using positive and negative n: +shift(x, n = -1:1) # on data.tables DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5]) diff --git a/src/shift.c b/src/shift.c index c9f8e215ec..70bd85a704 100644 --- a/src/shift.c +++ b/src/shift.c @@ -23,194 +23,146 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) { if (!isString(type) || length(type) != 1) error("type must be a character vector of length 1"); - if (!strcmp(CHAR(STRING_ELT(type, 0)), "lag")) stype = LAG; + if (!strcmp(CHAR(STRING_ELT(type, 0)), "lag")) stype = LAG; else if (!strcmp(CHAR(STRING_ELT(type, 0)), "lead")) stype = LEAD; else error("Internal error: invalid type for shift(), should have been caught before. please report to data.table issue tracker"); // # nocov nx = length(x); nk = length(k); - i=0; - while(i < nk && INTEGER(k)[i] >= 0) i++; - if (i != nk) - error("n must be non-negative integer values (>= 0)"); + ans = PROTECT(allocVector(VECSXP, nk * nx)); protecti++; - if (stype == LAG) { - for (i=0; i= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; - SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) ); - if (xrows - INTEGER(k)[j] > 0) - memmove((char *)DATAPTR(tmp)+(INTEGER(k)[j]*size), - (char *)DATAPTR(elem), - (xrows-INTEGER(k)[j])*size); + for (i=0; i= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; + thisk = (xrows >= thisk) ? thisk : xrows; + SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) ); + // LAG when type = 'lag' and n >= 0 _or_ type = 'lead' and n < 0 + if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { + if (xrows - thisk > 0) + memmove((char *)DATAPTR(tmp)+(thisk*size), + (char *)DATAPTR(elem), + (xrows-thisk)*size); for (m=0; m=0 _or_ type = 'lag', n<0 } else { - thisfill = PROTECT(coerceVector(fill, REALSXP)); protecti++; - } - for (j=0; j= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; - SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(REALSXP, xrows) ); - if (xrows - INTEGER(k)[j] > 0) { - memmove((char *)DATAPTR(tmp)+(INTEGER(k)[j]*size), - (char *)DATAPTR(elem), - (xrows-INTEGER(k)[j])*size); - } - for (m=0; m= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; - SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) ); - if (xrows - INTEGER(k)[j] > 0) - memmove((char *)DATAPTR(tmp)+(INTEGER(k)[j]*size), - (char *)DATAPTR(elem), - (xrows-INTEGER(k)[j])*size); - for (m=0; m= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; - SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) ); - if (xrows - INTEGER(k)[j] > 0) + if (xrows - thisk > 0) memmove((char *)DATAPTR(tmp), - (char *)DATAPTR(elem)+(INTEGER(k)[j]*size), - (xrows-INTEGER(k)[j])*size); + (char *)DATAPTR(elem)+(thisk*size), + (xrows-thisk)*size); for (m=xrows-thisk; m= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; + thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; + SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(REALSXP, xrows) ); + if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { + if (xrows - thisk > 0) { + memmove((char *)DATAPTR(tmp)+(thisk*size), + (char *)DATAPTR(elem), + (xrows-thisk)*size); + } + for (m=0; m= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; - SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(REALSXP, xrows)); - if (xrows - INTEGER(k)[j] > 0) + if (xrows - thisk > 0) memmove((char *)DATAPTR(tmp), - (char *)DATAPTR(elem)+(INTEGER(k)[j]*size), - (xrows-INTEGER(k)[j])*size); + (char *)DATAPTR(elem)+(thisk*size), + (xrows-thisk)*size); for (m=xrows-thisk; m= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; - SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) ); - if (xrows - INTEGER(k)[j] > 0) + case LGLSXP : + thisfill = PROTECT(coerceVector(fill, LGLSXP)); protecti++; + for (j=0; j= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; + thisk = (xrows >= thisk) ? thisk : xrows; + SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) ); + if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { + if (xrows - thisk > 0) + memmove((char *)DATAPTR(tmp)+(thisk*size), + (char *)DATAPTR(elem), + (xrows-thisk)*size); + for (m=0; m 0) memmove((char *)DATAPTR(tmp), - (char *)DATAPTR(elem)+(INTEGER(k)[j]*size), - (xrows-INTEGER(k)[j])*size); + (char *)DATAPTR(elem)+(thisk*size), + (xrows-thisk)*size); for (m=xrows-thisk; m= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; + if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { + for (m=0; m= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; + if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { for (m=0; m Date: Fri, 30 Nov 2018 17:40:46 +0800 Subject: [PATCH 2/2] coverage tests --- inst/tests/tests.Rraw | 5 +++++ src/shift.c | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index bac583f658..3453159535 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12438,6 +12438,11 @@ test(1960.7, shift(DT, -1:1), c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA), 10:1, c(NA, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L))) +## some coverage tests for good measure +test(1960.8, shift(DT$x, type = 'some_other_type'), + error = 'should be one of "lag"') +test(1960.9, shift(c(1+3i, 2-1i)), error = 'Unsupported type') + ################################### # Add new tests above this line # ################################### diff --git a/src/shift.c b/src/shift.c index 70bd85a704..940097efdc 100644 --- a/src/shift.c +++ b/src/shift.c @@ -20,8 +20,9 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) { error("Internal error: n must be integer"); // # nocov if (length(fill) != 1) error("fill must be a vector of length 1"); + // the following two errors should be caught by match.arg() at the R level if (!isString(type) || length(type) != 1) - error("type must be a character vector of length 1"); + error("Internal error: invalid type for shift(), should have been caught before. please report to data.table issue tracker"); // # nocov if (!strcmp(CHAR(STRING_ELT(type, 0)), "lag")) stype = LAG; else if (!strcmp(CHAR(STRING_ELT(type, 0)), "lead")) stype = LEAD;