-
Notifications
You must be signed in to change notification settings - Fork 1k
Support negative values of n in shift #3166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,9 +14,9 @@ shift(x, n=1L, fill=NA, type=c("lag", "lead"), give.names=FALSE) | |
| } | ||
| \arguments{ | ||
| \item{x}{ A vector, list, data.frame or data.table. } | ||
| \item{n}{ Non-negative integer vector denoting the offset to lead or lag the input by. To create multiple lead/lag vectors, provide multiple values to \code{n}. } | ||
| \item{fill}{ Value to pad by. } | ||
| \item{type}{ default is \code{"lag"}. The other possible value is \code{"lead"}. } | ||
| \item{n}{ integer vector denoting the offset by which to lead or lag the input. To create multiple lead/lag vectors, provide multiple values to \code{n}; negative values of \code{n} will "flip" the value of \code{type}, i.e., \code{n=-1} and \code{type='lead'} is the same as \code{n=1} and \code{type='lag'}. } | ||
| \item{fill}{ Value to use for padding when the window goes beyond the input length. } | ||
| \item{type}{ default is \code{"lag"} (look "backwards"). The other possible value is \code{"lead"} (look "forwards"). } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would more strongly suggest to use negative |
||
| \item{give.names}{default is \code{FALSE} which returns an unnamed list. When \code{TRUE}, names are automatically generated corresponding to \code{type} and \code{n}. } | ||
| } | ||
| \details{ | ||
|
|
@@ -37,6 +37,8 @@ x = 1:5 | |
| shift(x, n=1, fill=NA, type="lag") | ||
| # lag with n=1 and 2, and pad with 0 (returns list) | ||
| shift(x, n=1:2, fill=0, type="lag") | ||
| # getting a window by using positive and negative n: | ||
| shift(x, n = -1:1) | ||
|
|
||
| # on data.tables | ||
| DT = data.table(year=2010:2014, v1=runif(5), v2=1:5, v3=letters[1:5]) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,197 +20,150 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) { | |
| error("Internal error: n must be integer"); // # nocov | ||
| if (length(fill) != 1) | ||
| error("fill must be a vector of length 1"); | ||
| // the following two errors should be caught by match.arg() at the R level | ||
| if (!isString(type) || length(type) != 1) | ||
| error("type must be a character vector of length 1"); | ||
| error("Internal error: invalid type for shift(), should have been caught before. please report to data.table issue tracker"); // # nocov | ||
|
|
||
| if (!strcmp(CHAR(STRING_ELT(type, 0)), "lag")) stype = LAG; | ||
| if (!strcmp(CHAR(STRING_ELT(type, 0)), "lag")) stype = LAG; | ||
| else if (!strcmp(CHAR(STRING_ELT(type, 0)), "lead")) stype = LEAD; | ||
| else error("Internal error: invalid type for shift(), should have been caught before. please report to data.table issue tracker"); // # nocov | ||
|
|
||
| nx = length(x); nk = length(k); | ||
| i=0; | ||
| while(i < nk && INTEGER(k)[i] >= 0) i++; | ||
| if (i != nk) | ||
| error("n must be non-negative integer values (>= 0)"); | ||
|
|
||
| ans = PROTECT(allocVector(VECSXP, nk * nx)); protecti++; | ||
| if (stype == LAG) { | ||
| for (i=0; i<nx; i++) { | ||
| elem = VECTOR_ELT(x, i); | ||
| size = SIZEOF(elem); | ||
| xrows = length(elem); | ||
| switch (TYPEOF(elem)) { | ||
| case INTSXP : | ||
| thisfill = PROTECT(coerceVector(fill, INTSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) ); | ||
| if (xrows - INTEGER(k)[j] > 0) | ||
| memmove((char *)DATAPTR(tmp)+(INTEGER(k)[j]*size), | ||
| (char *)DATAPTR(elem), | ||
| (xrows-INTEGER(k)[j])*size); | ||
| for (i=0; i<nx; i++) { | ||
| elem = VECTOR_ELT(x, i); | ||
| size = SIZEOF(elem); | ||
| xrows = length(elem); | ||
| switch (TYPEOF(elem)) { | ||
| case INTSXP : | ||
| thisfill = PROTECT(coerceVector(fill, INTSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; | ||
| thisk = (xrows >= thisk) ? thisk : xrows; | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) ); | ||
| // LAG when type = 'lag' and n >= 0 _or_ type = 'lead' and n < 0 | ||
| if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { | ||
| if (xrows - thisk > 0) | ||
| memmove((char *)DATAPTR(tmp)+(thisk*size), | ||
| (char *)DATAPTR(elem), | ||
| (xrows-thisk)*size); | ||
| for (m=0; m<thisk; m++) | ||
| INTEGER(tmp)[m] = INTEGER(thisfill)[0]; | ||
| copyMostAttrib(elem, tmp); | ||
| if (isFactor(elem)) | ||
| setAttrib(tmp, R_LevelsSymbol, getAttrib(elem, R_LevelsSymbol)); | ||
| } | ||
| break; | ||
|
|
||
| case REALSXP : | ||
| klass = getAttrib(elem, R_ClassSymbol); | ||
| if (isString(klass) && STRING_ELT(klass, 0) == char_integer64) { | ||
| thisfill = PROTECT(allocVector(REALSXP, 1)); protecti++; | ||
| dthisfill = (unsigned long long *)REAL(thisfill); | ||
| if (INTEGER(fill)[0] == NA_INTEGER) | ||
| dthisfill[0] = NA_INT64_LL; | ||
| else dthisfill[0] = (unsigned long long)INTEGER(fill)[0]; | ||
| // only two possibilities left: type = 'lead', n>=0 _or_ type = 'lag', n<0 | ||
| } else { | ||
| thisfill = PROTECT(coerceVector(fill, REALSXP)); protecti++; | ||
| } | ||
| for (j=0; j<nk; j++) { | ||
| thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(REALSXP, xrows) ); | ||
| if (xrows - INTEGER(k)[j] > 0) { | ||
| memmove((char *)DATAPTR(tmp)+(INTEGER(k)[j]*size), | ||
| (char *)DATAPTR(elem), | ||
| (xrows-INTEGER(k)[j])*size); | ||
| } | ||
| for (m=0; m<thisk; m++) { | ||
| REAL(tmp)[m] = REAL(thisfill)[0]; | ||
| } | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
|
|
||
| case LGLSXP : | ||
| thisfill = PROTECT(coerceVector(fill, LGLSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) ); | ||
| if (xrows - INTEGER(k)[j] > 0) | ||
| memmove((char *)DATAPTR(tmp)+(INTEGER(k)[j]*size), | ||
| (char *)DATAPTR(elem), | ||
| (xrows-INTEGER(k)[j])*size); | ||
| for (m=0; m<thisk; m++) | ||
| LOGICAL(tmp)[m] = LOGICAL(thisfill)[0]; | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
|
|
||
| case STRSXP : | ||
| thisfill = PROTECT(coerceVector(fill, STRSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(STRSXP, xrows) ); | ||
| for (m=0; m<xrows; m++) | ||
| SET_STRING_ELT(tmp, m, (m < INTEGER(k)[j]) ? STRING_ELT(thisfill, 0) : STRING_ELT(elem, m - INTEGER(k)[j])); | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
|
|
||
| case VECSXP : | ||
| thisfill = PROTECT(coerceVector(fill, VECSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(VECSXP, xrows) ); | ||
| for (m=0; m<xrows; m++) | ||
| SET_VECTOR_ELT(tmp, m, (m < INTEGER(k)[j]) ? VECTOR_ELT(thisfill, 0) : VECTOR_ELT(elem, m - INTEGER(k)[j])); | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
|
|
||
| default : | ||
| error("Unsupported type '%s'", type2char(TYPEOF(elem))); | ||
| } | ||
| copyMostAttrib(elem, tmp); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I honestly thing these lines are vestigial and I think they may be slowing down
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. which "these" precisely? what is it is accomplishing that is already done in INTSXP? |
||
| if (isFactor(elem)) | ||
| setAttrib(tmp, R_LevelsSymbol, getAttrib(elem, R_LevelsSymbol)); | ||
| } | ||
| } else if (stype == LEAD) { | ||
| for (i=0; i<nx; i++) { | ||
| elem = VECTOR_ELT(x, i); | ||
| size = SIZEOF(elem); | ||
| xrows = length(elem); | ||
| switch (TYPEOF(elem)) { | ||
| case INTSXP : | ||
| thisfill = PROTECT(coerceVector(fill, INTSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(INTSXP, xrows) ); | ||
| if (xrows - INTEGER(k)[j] > 0) | ||
| if (xrows - thisk > 0) | ||
| memmove((char *)DATAPTR(tmp), | ||
| (char *)DATAPTR(elem)+(INTEGER(k)[j]*size), | ||
| (xrows-INTEGER(k)[j])*size); | ||
| (char *)DATAPTR(elem)+(thisk*size), | ||
| (xrows-thisk)*size); | ||
| for (m=xrows-thisk; m<xrows; m++) | ||
| INTEGER(tmp)[m] = INTEGER(thisfill)[0]; | ||
| copyMostAttrib(elem, tmp); | ||
| if (isFactor(elem)) | ||
| setAttrib(tmp, R_LevelsSymbol, getAttrib(elem, R_LevelsSymbol)); | ||
| } | ||
| break; | ||
| copyMostAttrib(elem, tmp); | ||
| if (isFactor(elem)) | ||
| setAttrib(tmp, R_LevelsSymbol, getAttrib(elem, R_LevelsSymbol)); | ||
| } | ||
| break; | ||
|
|
||
| case REALSXP : | ||
| klass = getAttrib(elem, R_ClassSymbol); | ||
| if (isString(klass) && STRING_ELT(klass, 0) == char_integer64) { | ||
| thisfill = PROTECT(allocVector(REALSXP, 1)); protecti++; | ||
| dthisfill = (unsigned long long *)REAL(thisfill); | ||
| if (INTEGER(fill)[0] == NA_INTEGER) | ||
| dthisfill[0] = NA_INT64_LL; | ||
| else dthisfill[0] = (unsigned long long)INTEGER(fill)[0]; | ||
| case REALSXP : | ||
| klass = getAttrib(elem, R_ClassSymbol); | ||
| if (isString(klass) && STRING_ELT(klass, 0) == char_integer64) { | ||
| thisfill = PROTECT(allocVector(REALSXP, 1)); protecti++; | ||
| dthisfill = (unsigned long long *)REAL(thisfill); | ||
| if (INTEGER(fill)[0] == NA_INTEGER) | ||
| dthisfill[0] = NA_INT64_LL; | ||
| else dthisfill[0] = (unsigned long long)INTEGER(fill)[0]; | ||
| } else { | ||
| thisfill = PROTECT(coerceVector(fill, REALSXP)); protecti++; | ||
| } | ||
| for (j=0; j<nk; j++) { | ||
| thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; | ||
| thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(REALSXP, xrows) ); | ||
| if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { | ||
| if (xrows - thisk > 0) { | ||
| memmove((char *)DATAPTR(tmp)+(thisk*size), | ||
| (char *)DATAPTR(elem), | ||
| (xrows-thisk)*size); | ||
| } | ||
| for (m=0; m<thisk; m++) { | ||
| REAL(tmp)[m] = REAL(thisfill)[0]; | ||
| } | ||
| } else { | ||
| thisfill = PROTECT(coerceVector(fill, REALSXP)); protecti++; | ||
| } | ||
| for (j=0; j<nk; j++) { | ||
| thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(REALSXP, xrows)); | ||
| if (xrows - INTEGER(k)[j] > 0) | ||
| if (xrows - thisk > 0) | ||
| memmove((char *)DATAPTR(tmp), | ||
| (char *)DATAPTR(elem)+(INTEGER(k)[j]*size), | ||
| (xrows-INTEGER(k)[j])*size); | ||
| (char *)DATAPTR(elem)+(thisk*size), | ||
| (xrows-thisk)*size); | ||
| for (m=xrows-thisk; m<xrows; m++) | ||
| REAL(tmp)[m] = REAL(thisfill)[0]; | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
|
|
||
| case LGLSXP : | ||
| thisfill = PROTECT(coerceVector(fill, LGLSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| thisk = (xrows >= INTEGER(k)[j]) ? INTEGER(k)[j] : xrows; | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) ); | ||
| if (xrows - INTEGER(k)[j] > 0) | ||
| case LGLSXP : | ||
| thisfill = PROTECT(coerceVector(fill, LGLSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; | ||
| thisk = (xrows >= thisk) ? thisk : xrows; | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(LGLSXP, xrows) ); | ||
| if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { | ||
| if (xrows - thisk > 0) | ||
| memmove((char *)DATAPTR(tmp)+(thisk*size), | ||
| (char *)DATAPTR(elem), | ||
| (xrows-thisk)*size); | ||
| for (m=0; m<thisk; m++) | ||
| LOGICAL(tmp)[m] = LOGICAL(thisfill)[0]; | ||
| } else { | ||
| if (xrows - thisk > 0) | ||
| memmove((char *)DATAPTR(tmp), | ||
| (char *)DATAPTR(elem)+(INTEGER(k)[j]*size), | ||
| (xrows-INTEGER(k)[j])*size); | ||
| (char *)DATAPTR(elem)+(thisk*size), | ||
| (xrows-thisk)*size); | ||
| for (m=xrows-thisk; m<xrows; m++) | ||
| LOGICAL(tmp)[m] = LOGICAL(thisfill)[0]; | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
|
|
||
| case STRSXP : | ||
| thisfill = PROTECT(coerceVector(fill, STRSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(STRSXP, xrows) ); | ||
| case STRSXP : | ||
| thisfill = PROTECT(coerceVector(fill, STRSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(STRSXP, xrows) ); | ||
| thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; | ||
| if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { | ||
| for (m=0; m<xrows; m++) | ||
| SET_STRING_ELT(tmp, m, (m < thisk) ? STRING_ELT(thisfill, 0) : STRING_ELT(elem, m - thisk)); | ||
| } else { | ||
| for (m=0; m<xrows; m++) | ||
| SET_STRING_ELT(tmp, m, (xrows-m <= INTEGER(k)[j]) ? STRING_ELT(thisfill, 0) : STRING_ELT(elem, m + INTEGER(k)[j])); | ||
| copyMostAttrib(elem, tmp); | ||
| SET_STRING_ELT(tmp, m, (xrows-m <= thisk) ? STRING_ELT(thisfill, 0) : STRING_ELT(elem, m + thisk)); | ||
| } | ||
| break; | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
|
|
||
|
|
||
| case VECSXP : | ||
| thisfill = PROTECT(coerceVector(fill, VECSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(VECSXP, xrows) ); | ||
| case VECSXP : | ||
| thisfill = PROTECT(coerceVector(fill, VECSXP)); protecti++; | ||
| for (j=0; j<nk; j++) { | ||
| SET_VECTOR_ELT(ans, i*nk+j, tmp=allocVector(VECSXP, xrows) ); | ||
| thisk = (INTEGER(k)[j] >= 0) ? INTEGER(k)[j] : -INTEGER(k)[j]; | ||
| if ((stype == LAG && INTEGER(k)[j] >= 0) || (stype == LEAD && INTEGER(k)[j] < 0)) { | ||
| for (m=0; m<xrows; m++) | ||
| SET_VECTOR_ELT(tmp, m, (xrows-m <= INTEGER(k)[j]) ? VECTOR_ELT(thisfill, 0) : VECTOR_ELT(elem, m + INTEGER(k)[j])); | ||
| copyMostAttrib(elem, tmp); | ||
| SET_VECTOR_ELT(tmp, m, (m < thisk) ? VECTOR_ELT(thisfill, 0) : VECTOR_ELT(elem, m - thisk)); | ||
| } else { | ||
| for (m=0; m<xrows; m++) | ||
| SET_VECTOR_ELT(tmp, m, (xrows-m <= thisk) ? VECTOR_ELT(thisfill, 0) : VECTOR_ELT(elem, m + thisk)); | ||
| } | ||
| break; | ||
|
|
||
| default : | ||
| error("Unsupported type '%s'", type2char(TYPEOF(elem))); | ||
| copyMostAttrib(elem, tmp); | ||
| } | ||
| break; | ||
|
|
||
| default : | ||
| error("Unsupported type '%s'", type2char(TYPEOF(elem))); | ||
| } | ||
| } | ||
|
|
||
| UNPROTECT(protecti); | ||
| return isVectorAtomic(obj) && length(ans) == 1 ? VECTOR_ELT(ans, 0) : ans; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is certainly awkwardly named. The naming is done at the R level, so it would be very easy for me to change this to
V1_lead_1. Any thoughts?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
better than
lag/leadin this case to useshift_1orshift_-1Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this test is invalid, we should not set name on vector result, only for list results it make sense, will fix it as part of #3223 - unrelated to shift vs lag/lead naming