From cf1cdd73e16763b86f074fcd1cf89851ad289d81 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 14 Feb 2020 09:49:25 +0800 Subject: [PATCH 1/4] fix tests to work on R 3.1 --- inst/tests/tests.Rraw | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2a42adf7a2..0c005b99cf 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -5480,8 +5480,8 @@ test_no = 1368.0 for (i in seq_along(dt)) { col = dt[[i]] for (j in list(TRUE, FALSE, "keep")) { - for (k in c("average", "min", "max", "first", "last")) { - if (k == "random") set.seed(45L) + # ensure consistency with base::rank ties.methods as advertised + for (k in setdiff(eval(formals(base::rank)$ties.method), 'random')) { if (class(col) == "integer64") { r1 = rank(as.integer(col), ties.method=k, na.last=j) r2 = rank(-xtfrm(as.integer(col)), ties.method=k, na.last=j) @@ -5490,7 +5490,6 @@ for (i in seq_along(dt)) { r1 = rank(col, ties.method=k, na.last=j) r2 = rank(-xtfrm(col), ties.method=k, na.last=j) } - if (k == "random") set.seed(45L) r3 = frankv(col, ties.method=k, na.last=j) r4 = frankv(col, order=-1L, ties.method=k, na.last=j) @@ -5511,8 +5510,8 @@ if (test_bit64) dt[, DD := as.integer64(DD)] test_no = 1369.0 for (i in seq_along(dt)) { col = dt[[i]] - for (k in c("average", "min", "max", "first", "last")) { - if (k == "random") set.seed(45L) + # ensure consistency with base::rank ties.methods as advertised + for (k in setdiff(eval(formals(base::rank)$ties.method), 'random')) { if (class(col) == "integer64") { r1 = rank(as.integer(col), ties.method=k, na.last=NA) r2 = rank(-xtfrm(as.integer(col)), ties.method=k, na.last=NA) @@ -5521,7 +5520,6 @@ for (i in seq_along(dt)) { r1 = rank(col, ties.method=k, na.last=NA) r2 = rank(-xtfrm(col), ties.method=k, na.last=NA) } - if (k == "random") set.seed(45L) r3 = frankv(col, ties.method=k, na.last=NA) r4 = frankv(col, order=-1L, ties.method=k, na.last=NA) From 85a6b493ad0937975d57fc254f22935e06134a49 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 14 Feb 2020 17:58:00 -0700 Subject: [PATCH 2/4] turned 'random' on in the loop and added back the two set.seed --- inst/tests/tests.Rraw | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0c005b99cf..61dc699a77 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -5481,7 +5481,8 @@ for (i in seq_along(dt)) { col = dt[[i]] for (j in list(TRUE, FALSE, "keep")) { # ensure consistency with base::rank ties.methods as advertised - for (k in setdiff(eval(formals(base::rank)$ties.method), 'random')) { + for (k in eval(formals(base::rank)$ties.method)) { + if (k == "random") set.seed(45L) if (class(col) == "integer64") { r1 = rank(as.integer(col), ties.method=k, na.last=j) r2 = rank(-xtfrm(as.integer(col)), ties.method=k, na.last=j) @@ -5490,6 +5491,7 @@ for (i in seq_along(dt)) { r1 = rank(col, ties.method=k, na.last=j) r2 = rank(-xtfrm(col), ties.method=k, na.last=j) } + if (k == "random") set.seed(45L) r3 = frankv(col, ties.method=k, na.last=j) r4 = frankv(col, order=-1L, ties.method=k, na.last=j) From 5d2847e6b071dcf5b2a295d0e6bb702cc1c82b8b Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 14 Feb 2020 18:16:57 -0700 Subject: [PATCH 3/4] comment added to 1369 about 'random' with TODO --- inst/tests/tests.Rraw | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 61dc699a77..f21dfcf253 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -5513,7 +5513,8 @@ test_no = 1369.0 for (i in seq_along(dt)) { col = dt[[i]] # ensure consistency with base::rank ties.methods as advertised - for (k in setdiff(eval(formals(base::rank)$ties.method), 'random')) { + for (k in setdiff(eval(formals(base::rank)$ties.method), 'random')) { # 'random' off with na.last=NA as there's a difference (TODO: fix) + if (k == "random") set.seed(45L) if (class(col) == "integer64") { r1 = rank(as.integer(col), ties.method=k, na.last=NA) r2 = rank(-xtfrm(as.integer(col)), ties.method=k, na.last=NA) @@ -5522,6 +5523,7 @@ for (i in seq_along(dt)) { r1 = rank(col, ties.method=k, na.last=NA) r2 = rank(-xtfrm(col), ties.method=k, na.last=NA) } + if (k == "random") set.seed(45L) r3 = frankv(col, ties.method=k, na.last=NA) r4 = frankv(col, order=-1L, ties.method=k, na.last=NA) From 518f39a6cd7c7589dcf3e795794180b1ceda60e7 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Fri, 14 Feb 2020 19:12:46 -0700 Subject: [PATCH 4/4] ties='random' with na.last=NA now consistent with base and test 1369 put to bed --- NEWS.md | 2 ++ R/frank.R | 9 ++++++--- inst/tests/tests.Rraw | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 7251bace79..dc66ce2897 100644 --- a/NEWS.md +++ b/NEWS.md @@ -109,6 +109,8 @@ unit = "s") 4. `fifelse` and `fcase` notify users that S4 objects (except `nanotime`) are not supported [#4135](https://github.com/Rdatatable/data.table/issues/4135). Thanks to @torema-ed for bringing it to our attention and Morgan Jacob for the PR. +5. `frank(..., ties.method="random", na.last=NA)` now returns the same random ordering that `base::rank` does, [#4243](https://github.com/Rdatatable/data.table/pull/4243). + # data.table [v1.12.8](https://github.com/Rdatatable/data.table/milestone/15?closed=1) (09 Dec 2019) diff --git a/R/frank.R b/R/frank.R index 05fd7df754..763b8267e5 100644 --- a/R/frank.R +++ b/R/frank.R @@ -32,11 +32,14 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a nas = x[[ncol(x)]] } if (ties.method == "random") { - v = stats::runif(nrow(x)) if (is.na(na.last)) { idx = which_(nas, FALSE) - set(x, idx, '..stats_runif..', v[idx]) - } else set(x, NULL, '..stats_runif..', v) + n = length(idx) # changed in #4243 to match base R to pass test 1369 + } else { + idx = NULL + n = nrow(x) + } + set(x, idx, '..stats_runif..', stats::runif(n)) order = if (length(order) == 1L) c(rep(order, length(cols)), 1L) else c(order, 1L) cols = c(cols, ncol(x)) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f21dfcf253..b1c080c393 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -5513,7 +5513,7 @@ test_no = 1369.0 for (i in seq_along(dt)) { col = dt[[i]] # ensure consistency with base::rank ties.methods as advertised - for (k in setdiff(eval(formals(base::rank)$ties.method), 'random')) { # 'random' off with na.last=NA as there's a difference (TODO: fix) + for (k in eval(formals(base::rank)$ties.method)) { # 'random' on now with tweak to match base in #4243 if (k == "random") set.seed(45L) if (class(col) == "integer64") { r1 = rank(as.integer(col), ties.method=k, na.last=NA)