Rdatatable · mattdowle · Dec 14, 2018 · Nov 30, 2018 · Nov 30, 2018 · Dec 5, 2018
@@ -10,7 +10,9 @@
 
 3. `setnames()` gains `skip_absent` to skip names in `old` that aren't present, [#3030](https://github.com/Rdatatable/data.table/issues/3030). By default `FALSE` so that it is still an error, as before, to attempt to change a column name that is not present. Thanks to @MusTheDataGuy for the suggestion and the PR.
 
-4. `NA` in `between`'s `lower` and `upper` are now taken as missing bounds and return `TRUE` rather than than `NA`. This is now documented.
+4. `NA` in `between()` and `%between%`'s `lower` and `upper` are now taken as missing bounds and return `TRUE` rather than than `NA`. This is now documented.
+
+5. `shift()` now interprets negative values of `n` naturally to mean the opposite `type=`, [#1708](https://github.com/Rdatatable/data.table/issues/1708). 
 
 5. `fwrite()` now accepts `matrix`, [#2613](https://github.com/Rdatatable/data.table/issues/2613). Thanks to Michael Chirico for the suggestion and Felipe Parages for implementing. For now matrix input is converted to data.table (which can be costly) before writing. 
 

@@ -6239,7 +6239,6 @@ val = runif(1)
 test(1463.33, shift(val, 2L), NA_real_)
 test(1463.34, shift(val, 2L, type="lead"), NA_real_)
 
-test(1463.35, shift(1:5, -1L), error="n must be non-negative integer")
 test(1463.36, shift(1:5, 1L, fill=c(1:2)), error="fill must be a vector of length")
 
 # add tests for date and factor?
@@ -12854,7 +12853,31 @@ test(1962.109, second(z), 5L)
 test(1962.110, minute(z), 2L)
 test(1962.111, hour(z), 1L)
 
-# test 1963 reserved for another PR
+# positive and negative values for shift, #1708
+DT = data.table(x = 1:10, y = 10:1)
+test(1963.1, shift(DT$x, -1), c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA))
+test(1963.2, shift(DT$x, -1, type = 'lead'),
+     c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L))
+test(1963.3, shift(DT$x, -1, fill = 0L),
+     c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 0L))
+test(1963.4, shift(DT$x, -1, give.names = TRUE),
+     structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA),
+     .Names = c("V1_lag_-1", NA, NA, NA, NA, NA, NA, NA, NA, NA)))
+test(1963.5, shift(DT$x, -1:1),
+     list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10,
+          c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L)))
+test(1963.6, shift(DT, -1),
+     list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA),
+          c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA)))
+test(1963.7, shift(DT, -1:1),
+     list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10,
+          c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L),
+          c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA), 10:1,
+          c(NA, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L)))
+## some coverage tests for good measure
+test(1963.8, shift(DT$x, type = 'some_other_type'),
+     error = 'should be one of "lag"')
+test(1963.9, shift(c(1+3i, 2-1i)), error = 'Unsupported type')
 
 # 0 column data.table should not have rownames, #3149
 M0 = matrix(1:6, nrow=3, ncol=2, dimnames=list(rows=paste0("id",1:3), cols=c("v1","v2")))

@@ -14,9 +14,9 @@ shift(x, n=1L, fill=NA, type=c("lag", "lead"), give.names=FALSE)
 }
 \arguments{
   \item{x}{ A vector, list, data.frame or data.table. }
-  \item{n}{ Non-negative integer vector denoting the offset to lead or lag the input by. To create multiple lead/lag vectors, provide multiple values to \code{n}. }
-  \item{fill}{ Value to pad by. }
-  \item{type}{ default is \code{"lag"}. The other possible value is \code{"lead"}. }
+  \item{n}{ integer vector denoting the offset by which to lead or lag the input. To create multiple lead/lag vectors, provide multiple values to \code{n}; negative values of \code{n} will "flip" the value of \code{type}, i.e., \code{n=-1} and \code{type='lead'} is the same as \code{n=1} and \code{type='lag'}. }
+  \item{fill}{ Value to use for padding when the window goes beyond the input length. }
+  \item{type}{ default is \code{"lag"} (look "backwards"). The other possible value is \code{"lead"} (look "forwards"). }
   \item{give.names}{default is \code{FALSE} which returns an unnamed list. When \code{TRUE}, names are automatically generated corresponding to \code{type} and \code{n}. }
 }
 \details{
@@ -37,6 +37,8 @@ x = 1:5
 shift(x, n=1, fill=NA, type="lag")
 # lag with n=1 and 2, and pad with 0 (returns list)
 shift(x, n=1:2, fill=0, type="lag")
+# getting a window by using positive and negative n:
+shift(x, n = -1:1)
 
 # on data.tables
 DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5])

@@ -20,197 +20,150 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) {
     error("Internal error: n must be integer"); // # nocov
   if (length(fill) != 1)
     error("fill must be a vector of length 1");
+  // the following two errors should be caught by match.arg() at the R level
   if (!isString(type) || length(type) != 1)
-    error("type must be a character vector of length 1");
+    error("Internal error: invalid type for shift(), should have been caught before. please report to data.table issue tracker"); // # nocov
 
-  if (!strcmp(CHAR(STRING_ELT(type, 0)), "lag"))  stype = LAG;
+  if (!strcmp(CHAR(STRING_ELT(type, 0)), "lag")) stype = LAG;
   else if (!strcmp(CHAR(STRING_ELT(type, 0)), "lead")) stype = LEAD;
   else error("Internal error: invalid type for shift(), should have been caught before. please report to data.table issue tracker"); // # nocov
 
   nx = length(x); nk = length(k);
-  i=0;
-  while(i < nk && INTEGER(k)[i] >= 0) i++;
-  if (i != nk)
-    error("n must be non-negative integer values (>= 0)");
+
   ans = PROTECT(allocVector(VECSXP, nk * nx)); protecti++;
-  if (stype == LAG) {
-    for (i=0; i<nx; i++) {
-      elem  = VECTOR_ELT(x, i);
-      size  = SIZEOF(elem);
-      xrows = length(elem);
-      switch (TYPEOF(elem)) {
-      case INTSXP :
-        thisfill = PROTECT(coerceVector(fill, INTSXP)); protecti++;
-        for (j=0; j<nk; j++) {
-          thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows;
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) );
-          if (xrows - INTEGER(k)[j] > 0)
-            memmove((char *)DATAPTR(tmp)+(INTEGER(k)[j]*size),
-                 (char *)DATAPTR(elem),
-                 (xrows-INTEGER(k)[j])*size);
+  for (i=0; i<nx; i++) {
+    elem  = VECTOR_ELT(x, i);
+    size  = SIZEOF(elem);
+    xrows = length(elem);
+    switch (TYPEOF(elem)) {
+    case INTSXP :
+      thisfill = PROTECT(coerceVector(fill, INTSXP)); protecti++;
+      for (j=0; j<nk; j++) {
+        thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j];
+        thisk = (xrows >= thisk) ? thisk : xrows;
+        SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) );
+        // LAG when type = 'lag' and n >= 0 _or_ type = 'lead' and n < 0
+        if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) {
+          if (xrows - thisk > 0)
+            memmove((char *)DATAPTR(tmp)+(thisk*size),
+                    (char *)DATAPTR(elem),
+                    (xrows-thisk)*size);
           for (m=0; m<thisk; m++)
             INTEGER(tmp)[m] = INTEGER(thisfill)[0];
-          copyMostAttrib(elem, tmp);
-          if (isFactor(elem))
-            setAttrib(tmp, R_LevelsSymbol, getAttrib(elem, R_LevelsSymbol));
-        }
-        break;
-
-      case REALSXP :
-        klass = getAttrib(elem, R_ClassSymbol);
-        if (isString(klass) && STRING_ELT(klass, 0) == char_integer64) {
-          thisfill = PROTECT(allocVector(REALSXP, 1)); protecti++;
-          dthisfill = (unsigned long long *)REAL(thisfill);
-          if (INTEGER(fill)[0] == NA_INTEGER)
-            dthisfill[0] = NA_INT64_LL;
-          else dthisfill[0] = (unsigned long long)INTEGER(fill)[0];
+          // only two possibilities left: type = 'lead', n>=0 _or_ type = 'lag', n<0
         } else {
-          thisfill = PROTECT(coerceVector(fill, REALSXP)); protecti++;
-        }
-        for (j=0; j<nk; j++) {
-          thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows;
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(REALSXP, xrows) );
-          if (xrows - INTEGER(k)[j] > 0) {
-            memmove((char *)DATAPTR(tmp)+(INTEGER(k)[j]*size),
-                 (char *)DATAPTR(elem),
-                 (xrows-INTEGER(k)[j])*size);
-          }
-          for (m=0; m<thisk; m++) {
-            REAL(tmp)[m] = REAL(thisfill)[0];
-          }
-          copyMostAttrib(elem, tmp);
-        }
-        break;
-
-      case LGLSXP :
-        thisfill = PROTECT(coerceVector(fill, LGLSXP)); protecti++;
-        for (j=0; j<nk; j++) {
-          thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows;
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) );
-          if (xrows - INTEGER(k)[j] > 0)
-            memmove((char *)DATAPTR(tmp)+(INTEGER(k)[j]*size),
-                 (char *)DATAPTR(elem),
-                 (xrows-INTEGER(k)[j])*size);
-          for (m=0; m<thisk; m++)
-            LOGICAL(tmp)[m] = LOGICAL(thisfill)[0];
-          copyMostAttrib(elem, tmp);
-        }
-        break;
-
-      case STRSXP :
-        thisfill = PROTECT(coerceVector(fill, STRSXP)); protecti++;
-        for (j=0; j<nk; j++) {
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(STRSXP, xrows) );
-          for (m=0; m<xrows; m++)
-            SET_STRING_ELT(tmp, m, (m < INTEGER(k)[j]) ? STRING_ELT(thisfill, 0) : STRING_ELT(elem, m - INTEGER(k)[j]));
-          copyMostAttrib(elem, tmp);
-        }
-        break;
-
-      case VECSXP :
-        thisfill = PROTECT(coerceVector(fill, VECSXP)); protecti++;
-        for (j=0; j<nk; j++) {
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(VECSXP, xrows) );
-          for (m=0; m<xrows; m++)
-            SET_VECTOR_ELT(tmp, m, (m < INTEGER(k)[j]) ? VECTOR_ELT(thisfill, 0) : VECTOR_ELT(elem, m - INTEGER(k)[j]));
-          copyMostAttrib(elem, tmp);
-        }
-        break;
-
-      default :
-        error("Unsupported type '%s'", type2char(TYPEOF(elem)));
-      }
-      copyMostAttrib(elem, tmp);
-      if (isFactor(elem))
-        setAttrib(tmp, R_LevelsSymbol, getAttrib(elem, R_LevelsSymbol));
-    }
-  } else if (stype == LEAD) {
-    for (i=0; i<nx; i++) {
-      elem  = VECTOR_ELT(x, i);
-      size  = SIZEOF(elem);
-      xrows = length(elem);
-      switch (TYPEOF(elem)) {
-      case INTSXP :
-        thisfill = PROTECT(coerceVector(fill, INTSXP)); protecti++;
-        for (j=0; j<nk; j++) {
-          thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows;
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) );
-          if (xrows - INTEGER(k)[j] > 0)
+          if (xrows - thisk > 0)
             memmove((char *)DATAPTR(tmp),
-                 (char *)DATAPTR(elem)+(INTEGER(k)[j]*size),
-                 (xrows-INTEGER(k)[j])*size);
+                    (char *)DATAPTR(elem)+(thisk*size),
+                    (xrows-thisk)*size);
           for (m=xrows-thisk; m<xrows; m++)
             INTEGER(tmp)[m] = INTEGER(thisfill)[0];
-          copyMostAttrib(elem, tmp);
-          if (isFactor(elem))
-            setAttrib(tmp, R_LevelsSymbol, getAttrib(elem, R_LevelsSymbol));
         }
-        break;
+        copyMostAttrib(elem, tmp);
+        if (isFactor(elem))
+          setAttrib(tmp, R_LevelsSymbol, getAttrib(elem, R_LevelsSymbol));
+      }
+      break;
 
-      case REALSXP :
-        klass = getAttrib(elem, R_ClassSymbol);
-        if (isString(klass) && STRING_ELT(klass, 0) == char_integer64) {
-          thisfill = PROTECT(allocVector(REALSXP, 1)); protecti++;
-          dthisfill = (unsigned long long *)REAL(thisfill);
-          if (INTEGER(fill)[0] == NA_INTEGER)
-            dthisfill[0] = NA_INT64_LL;
-          else dthisfill[0] = (unsigned long long)INTEGER(fill)[0];
+    case REALSXP :
+      klass = getAttrib(elem, R_ClassSymbol);
+      if (isString(klass) && STRING_ELT(klass, 0) == char_integer64) {
+        thisfill = PROTECT(allocVector(REALSXP, 1)); protecti++;
+        dthisfill = (unsigned long long *)REAL(thisfill);
+        if (INTEGER(fill)[0] == NA_INTEGER)
+          dthisfill[0] = NA_INT64_LL;
+        else dthisfill[0] = (unsigned long long)INTEGER(fill)[0];
+      } else {
+        thisfill = PROTECT(coerceVector(fill, REALSXP)); protecti++;
+      }
+      for (j=0; j<nk; j++) {
+        thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j];
+        thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows;
+        SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(REALSXP, xrows) );
+        if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) {
+          if (xrows - thisk > 0) {
+            memmove((char *)DATAPTR(tmp)+(thisk*size),
+                    (char *)DATAPTR(elem),
+                    (xrows-thisk)*size);
+          }
+          for (m=0; m<thisk; m++) {
+            REAL(tmp)[m] = REAL(thisfill)[0];
+          }
         } else {
-          thisfill = PROTECT(coerceVector(fill, REALSXP)); protecti++;
-        }
-        for (j=0; j<nk; j++) {
-          thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows;
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(REALSXP, xrows));
-          if (xrows - INTEGER(k)[j] > 0)
+          if (xrows - thisk > 0)
             memmove((char *)DATAPTR(tmp),
-                 (char *)DATAPTR(elem)+(INTEGER(k)[j]*size),
-                 (xrows-INTEGER(k)[j])*size);
+                    (char *)DATAPTR(elem)+(thisk*size),
+                    (xrows-thisk)*size);
           for (m=xrows-thisk; m<xrows; m++)
             REAL(tmp)[m] = REAL(thisfill)[0];
-          copyMostAttrib(elem, tmp);
         }
-        break;
+        copyMostAttrib(elem, tmp);
+      }
+      break;
 
-      case LGLSXP :
-        thisfill = PROTECT(coerceVector(fill, LGLSXP)); protecti++;
-        for (j=0; j<nk; j++) {
-          thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows;
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) );
-          if (xrows - INTEGER(k)[j] > 0)
+    case LGLSXP :
+      thisfill = PROTECT(coerceVector(fill, LGLSXP)); protecti++;
+      for (j=0; j<nk; j++) {
+        thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j];
+        thisk = (xrows >= thisk) ? thisk : xrows;
+        SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) );
+        if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) {
+          if (xrows - thisk > 0)
+            memmove((char *)DATAPTR(tmp)+(thisk*size),
+                    (char *)DATAPTR(elem),
+                    (xrows-thisk)*size);
+          for (m=0; m<thisk; m++)
+            LOGICAL(tmp)[m] = LOGICAL(thisfill)[0];
+        } else {
+          if (xrows - thisk > 0)
             memmove((char *)DATAPTR(tmp),
-                 (char *)DATAPTR(elem)+(INTEGER(k)[j]*size),
-                 (xrows-INTEGER(k)[j])*size);
+                    (char *)DATAPTR(elem)+(thisk*size),
+                    (xrows-thisk)*size);
           for (m=xrows-thisk; m<xrows; m++)
             LOGICAL(tmp)[m] = LOGICAL(thisfill)[0];
-          copyMostAttrib(elem, tmp);
         }
-        break;
+        copyMostAttrib(elem, tmp);
+      }
+      break;
 
-      case STRSXP :
-        thisfill = PROTECT(coerceVector(fill, STRSXP)); protecti++;
-        for (j=0; j<nk; j++) {
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(STRSXP, xrows) );
+    case STRSXP :
+      thisfill = PROTECT(coerceVector(fill, STRSXP)); protecti++;
+      for (j=0; j<nk; j++) {
+        SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(STRSXP, xrows) );
+        thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j];
+        if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) {
+          for (m=0; m<xrows; m++)
+            SET_STRING_ELT(tmp, m, (m < thisk) ? STRING_ELT(thisfill, 0) : STRING_ELT(elem, m - thisk));
+        } else {
           for (m=0; m<xrows; m++)
-            SET_STRING_ELT(tmp, m, (xrows-m <= INTEGER(k)[j]) ? STRING_ELT(thisfill, 0) : STRING_ELT(elem, m + INTEGER(k)[j]));
-          copyMostAttrib(elem, tmp);
+            SET_STRING_ELT(tmp, m, (xrows-m <= thisk) ? STRING_ELT(thisfill, 0) : STRING_ELT(elem, m + thisk));
         }
-        break;
+        copyMostAttrib(elem, tmp);
+      }
+      break;
+
 
-      case VECSXP :
-        thisfill = PROTECT(coerceVector(fill, VECSXP)); protecti++;
-        for (j=0; j<nk; j++) {
-          SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(VECSXP, xrows) );
+    case VECSXP :
+      thisfill = PROTECT(coerceVector(fill, VECSXP)); protecti++;
+      for (j=0; j<nk; j++) {
+        SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(VECSXP, xrows) );
+        thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j];
+        if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) {
           for (m=0; m<xrows; m++)
-            SET_VECTOR_ELT(tmp, m, (xrows-m <= INTEGER(k)[j]) ? VECTOR_ELT(thisfill, 0) : VECTOR_ELT(elem, m + INTEGER(k)[j]));
-            copyMostAttrib(elem, tmp);
+            SET_VECTOR_ELT(tmp, m, (m < thisk) ? VECTOR_ELT(thisfill, 0) : VECTOR_ELT(elem, m - thisk));
+        } else {
+          for (m=0; m<xrows; m++)
+            SET_VECTOR_ELT(tmp, m, (xrows-m <= thisk) ? VECTOR_ELT(thisfill, 0) : VECTOR_ELT(elem, m + thisk));
         }
-        break;
-
-      default :
-        error("Unsupported type '%s'", type2char(TYPEOF(elem)));
+        copyMostAttrib(elem, tmp);
       }
+      break;
+
+    default :
+      error("Unsupported type '%s'", type2char(TYPEOF(elem)));
     }
   }
+
   UNPROTECT(protecti);
   return isVectorAtomic(obj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans;
 }