From 1beddf40b25ad0386330cb2ba236a43b5dbbb817 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 7 May 2020 19:32:29 +0800 Subject: [PATCH] fix frank(.SD) for ties.method="random" / na.last=NA --- NEWS.md | 2 ++ R/frank.R | 7 ++++++- inst/tests/tests.Rraw | 11 +++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 026f06c86e..4da89f8c99 100644 --- a/NEWS.md +++ b/NEWS.md @@ -109,6 +109,8 @@ unit = "s") 13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388). +14. Passing `.SD` to `frankv` with `ties.method='random'` or with `na.last=NA` is now fixed, [#4429](https://github.com/Rdatatable/data.table/issues/4429). Thanks @smarches for the report. + ## NOTES 0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. diff --git a/R/frank.R b/R/frank.R index 763b8267e5..47e701c4cd 100644 --- a/R/frank.R +++ b/R/frank.R @@ -22,10 +22,13 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a if (!length(cols)) stop("x is a list, 'cols' can not be 0-length") } - x = .shallow(x, cols) # shallow copy even if list.. + # need to unlock for #4429 + x = .shallow(x, cols, unlock = TRUE) # shallow copy even if list.. setDT(x) cols = seq_along(cols) if (is.na(na.last)) { + if ("..na_prefix.." %chin% names(x)) + stop("Input column '..na_prefix..' conflicts with data.table internal usage; please rename") set(x, j = "..na_prefix..", value = is_na(x, cols)) order = if (length(order) == 1L) c(1L, rep(order, length(cols))) else c(1L, order) cols = c(ncol(x), cols) @@ -39,6 +42,8 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a idx = NULL n = nrow(x) } + if ('..stats_runif..' %chin% names(x)) + stop("Input column '..stats_runif..' conflicts with data.table internal usage; please rename") set(x, idx, '..stats_runif..', stats::runif(n)) order = if (length(order) == 1L) c(rep(order, length(cols)), 1L) else c(order, 1L) cols = c(cols, ncol(x)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ed17470383..2810a7455c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16853,3 +16853,14 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN)) test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) A = data.table(A=as.complex(rep(NA, 5))) test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A))) + +# shallow copy of .SD must be unlocked for frank using na.last=NA or ties.method='random', #4429 +DT = data.table(a=1:10) +test(2139.1, DT[ , frankv(.SD, ties.method='average', na.last=NA)], as.double(1:10)) +test(2139.2, DT[ , frankv(.SD, ties.method='random')], 1:10) +# coverage tests for some issues discovered on the way +DT[ , c('..na_prefix..', '..stats_runif..') := 1L] +test(2139.3, DT[ , frankv(.SD, ties.method='average', na.last=NA)], + error="Input column '..na_prefix..' conflicts") +test(2139.4, DT[ , frankv(.SD, ties.method='random')], + error="Input column '..stats_runif..' conflicts")