From f4c46cbf379b9d9e93549ad970b7084bc428276d Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 12 Sep 2019 01:09:25 +0800 Subject: [PATCH 1/4] setnames()'s `old=` and `new=` allows function --- R/data.table.R | 4 +++- inst/tests/tests.Rraw | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 3f97544372..7d714371ee 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2407,6 +2407,7 @@ setnames = function(x,old,new,skip_absent=FALSE) { stopifnot(isTRUEorFALSE(skip_absent)) if (missing(new)) { # for setnames(DT,new); e.g., setnames(DT,c("A","B")) where ncol(DT)==2 + if (is.function(old)) old = old(names(x)) if (!is.character(old)) stop("Passed a vector of type '",typeof(old),"'. Needs to be type 'character'.") if (length(old) != ncol) stop("Can't assign ",length(old)," names to a ",ncol," column data.table") if (anyNA(names(x))) { @@ -2420,7 +2421,8 @@ setnames = function(x,old,new,skip_absent=FALSE) { i = w } else { if (missing(old)) stop("When 'new' is provided, 'old' must be provided too") - if (!is.character(new)) stop("'new' is not a character vector") + if (is.function(new)) new = new(old) + if (!is.character(new)) stop("'new' is not a character vector or a function") # if (anyDuplicated(new)) warning("Some duplicates exist in 'new': ", brackify(new[duplicated(new)])) # dups allowed without warning; warn if and when the dup causes an ambiguity if (anyNA(new)) stop("NA in 'new' at positions ", brackify(which(is.na(new)))) if (anyDuplicated(old)) stop("Some duplicates exist in 'old': ", brackify(old[duplicated(old)])) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d705901860..1b1ffa9f5e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15936,6 +15936,12 @@ test(2100.14, fifelse(c(T,F,NA),c(1,1,1),c(2,2,2),NA), c(1,2,NA)) DT = data.table(id=1:3, v=4:6, key="id") test(2101, DT[.(logical())], data.table(id=logical(), v=integer(), key="id")) +# setnames() allows function arguments #3703 +DT = data.table(a=1:3, b=4:6, c=7:9) +setnames(DT, base::toupper) +test(2102.1, names(DT), c('A','B','C')) +setnames(DT, c('B','C'), function(x) sprintf('W_%s_W', x)) +test(2102.2, names(DT), c('A','W_B_W','W_C_W')) ################################### # Add new tests above this line # From fe3918ace8e32af11e1e28852d8103bf2d4f4158 Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 12 Sep 2019 01:09:33 +0800 Subject: [PATCH 2/4] update NEWS --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index e3820812ee..dd72cdb4ae 100644 --- a/NEWS.md +++ b/NEWS.md @@ -192,6 +192,7 @@ # frollapply 5.404 1.419 56.475 # froll[fun] 0.003 0.002 NA ``` +28. `setnames()` now allows function for arguments `old=` or `new=`, [#3703](https://github.com/Rdatatable/data.table/issues/3703). Thanks @smingerson for the feature request and @shrektan for the PR. #### BUG FIXES From 5ceed81d15a32940da668f6959506a20b0ef13e4 Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 12 Sep 2019 01:17:31 +0800 Subject: [PATCH 3/4] handle numeric old correctly --- R/data.table.R | 7 ++++++- inst/tests/tests.Rraw | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 7d714371ee..e01e7f6cd0 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2421,7 +2421,12 @@ setnames = function(x,old,new,skip_absent=FALSE) { i = w } else { if (missing(old)) stop("When 'new' is provided, 'old' must be provided too") - if (is.function(new)) new = new(old) + if (is.function(new)) { + if (is.numeric(old)) + new = new(names(x)[old]) + else + new = new(old) + } if (!is.character(new)) stop("'new' is not a character vector or a function") # if (anyDuplicated(new)) warning("Some duplicates exist in 'new': ", brackify(new[duplicated(new)])) # dups allowed without warning; warn if and when the dup causes an ambiguity if (anyNA(new)) stop("NA in 'new' at positions ", brackify(which(is.na(new)))) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1b1ffa9f5e..ec1f27dff0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15942,6 +15942,12 @@ setnames(DT, base::toupper) test(2102.1, names(DT), c('A','B','C')) setnames(DT, c('B','C'), function(x) sprintf('W_%s_W', x)) test(2102.2, names(DT), c('A','W_B_W','W_C_W')) +DT = data.table(a=1:3, b=4:6, c=7:9) +# support numeric old as well +setnames(DT, 1, toupper) +test(2102.3, names(DT), c('A','b','c')) +setnames(DT, -1, toupper) +test(2102.4, names(DT), c('A','B','C')) ################################### # Add new tests above this line # From 015c704fa37acd8697b504db681ed4d756ca814a Mon Sep 17 00:00:00 2001 From: shrektan Date: Thu, 12 Sep 2019 01:18:33 +0800 Subject: [PATCH 4/4] update docs --- man/setattr.Rd | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/man/setattr.Rd b/man/setattr.Rd index f6db5d2001..7a3d3c8e20 100644 --- a/man/setattr.Rd +++ b/man/setattr.Rd @@ -13,8 +13,8 @@ setnames(x,old,new,skip_absent=FALSE) \item{x}{ \code{setnames} accepts \code{data.frame} and \code{data.table}. \code{setattr} accepts any input; e.g, list, columns of a \code{data.frame} or \code{data.table}. } \item{name}{ The character attribute name. } \item{value}{ The value to assign to the attribute or \code{NULL} removes the attribute, if present. } - \item{old}{ When \code{new} is provided, character names or numeric positions of column names to change. When \code{new} is not provided, the new column names, which must be the same length as the number of columns. See examples. } - \item{new}{ Optional. New column names, must be the same length as columns provided to \code{old} argument. } + \item{old}{ When \code{new} is provided, character names or numeric positions of column names to change. When \code{new} is not provided, a function or the new column names. If a function, it will be called with the current column names and is supposed to return the new column names. The new column names must be the same length as the number of columns. See examples. } + \item{new}{ Optional. It can be a function or the new column names. If a function, it will be called with \code{old} and expected to return the new column names. The new column names must be the same length as columns provided to \code{old} argument. } \item{skip_absent}{ Skip items in \code{old} that are missing (i.e. absent) in `names(x)`. Default \code{FALSE} halts with error if any are missing. } } @@ -59,6 +59,8 @@ setnames(DT,3,"C") # by position with warning if 3 > ncol(DT) setnames(DT,2:3,c("D","E")) # multiple setnames(DT,c("a","E"),c("A","F")) # multiple by name (warning if either "a" or "E" is missing) setnames(DT,c("X","Y","Z")) # replace all (length of names must be == ncol(DT)) +setnames(DT,tolower) # replace all names with their lower case +setnames(DT,2:3,toupper) # replace the 2nd and 3rd names with their upper case DT <- data.table(x = 1:3, y = 4:6, z = 7:9) setnames(DT, -2, c("a", "b")) # NEW FR #1443, allows -ve indices in 'old' argument