diff --git a/NEWS.md b/NEWS.md index 381282d..5ad856b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -38,6 +38,8 @@ 1. `as.integer64.integer64` returns a plain `integer64` vector stripped of any attributes. This is consistent with R like behavior, e.g. `as.integer.integer`. 1. `%/%` matches base R/Knuth behavior of taking the `floor()` of a result, where before truncation was towards zero. For example, `as.integer64(-10L) %/% as.integer64(7L)` now gives `-2L`, not `-1L`. This is consistent with `-10L %/% 7L` in base R. Consequently, `%%` is also affected, e.g. `as.integer64(-10L) %% as.integer64(7L)` now gives `4L`, not `-3L`, consistent with `-10L %% 7L` in base R. +1. `quantile` and `median` have consistent behavior to base (#247). This is achieved by a change of `sortqtl` and `orderqtl`, which is used in `qtile` and thus also in `quantile` and `median`. This means that `median(as.integer64(c(1, 3)))` new returns `2` (previously `3`), consistent with `median(c(1, 3))` in base R. + ## NEW FEATURES 1. `anyNA` gets an `integer64` method. Thanks @hcirellu. @@ -76,6 +78,7 @@ 1. `[.integer64` now runs faster and correctly regarding `NA` and arrays (#176). Thanks @hcirellu. 1. `integer64() %in% 1L` no longer warns (#265). Thanks @hcirellu. 1. `match.integer64(..., method="orderpos")` and `duplicated.integer64(..., method="orderdup")` no longer fail with "object 's' not found" (#58). +1. `quantile` and `median` have consistent behavior to base (#247). This is achieved by a change of `sortqtl` and `orderqtl`, which is used in `qtile` and thus also in `quantile` and `median`. ## NOTES diff --git a/R/highlevel64.R b/R/highlevel64.R index 4cd9fd0..cf982f8 100644 --- a/R/highlevel64.R +++ b/R/highlevel64.R @@ -1189,10 +1189,15 @@ optimizer64 = function(nsmall=2L^16L, ) tim["quantile", "both"] = timefun({ - p = quantile(x, type=1L, na.rm=TRUE) + p = quantile(x, type=7L, na.rm=TRUE) })[3L] p2 = p - p = as.integer64(p2) + roundToBigger = function(x) { + res = round(x) + res[x%%1 == 0.5] = ceiling(x[x%%1 == 0.5]) + res + } + p = as.integer64(roundToBigger(p2)) names(p) = names(p2) x = as.integer64(x) diff --git a/R/sortuse64.R b/R/sortuse64.R index c344720..6df6fe9 100644 --- a/R/sortuse64.R +++ b/R/sortuse64.R @@ -546,7 +546,10 @@ sortqtl = function(sorted, na.count, probs, ...) UseMethod("sortqtl") #' @export sortqtl.integer64 = function(sorted, na.count, probs, ...) { n = length(sorted) - na.count # nvalid - ret = sorted[na.count + round(1L + probs * (n - 1L))] + sel = na.count + (1L + probs * (n - 1L)) + idx = matrix(c(floor(sel), ceiling(sel)), nrow=2L, byrow=TRUE) + neighboring_values = matrix(sorted[idx], nrow=2L) + ret = neighboring_values[1L,] + (neighboring_values[2L,] - neighboring_values[1L,])*(sel%%1) # TODO(#31): Remove this once `[` can return NA for integer64 directly ret[is.na(probs)] = NA ret @@ -560,8 +563,10 @@ orderqtl = function(table, order, na.count, probs, ...) UseMethod("orderqtl") #' @export orderqtl.integer64 = function(table, order, na.count, probs, ...) { n = length(table) - na.count # nvalid - idx = na.count + round(1L + probs * (n - 1L)) - ret = table[order[idx]] + sel = na.count + (1L + probs * (n - 1L)) + idx = matrix(c(floor(sel), ceiling(sel)), nrow=2L, byrow=TRUE) + neighboring_values = matrix(table[order[idx]], nrow=2L) + ret = neighboring_values[1L,] + (neighboring_values[2L,] - neighboring_values[1L,])*(sel%%1) # TODO(#31): Remove this once `[` can return NA for integer64 directly ret[is.na(probs)] = NA ret diff --git a/tests/testthat/test-highlevel64.R b/tests/testthat/test-highlevel64.R index 25c6bec..e0ed1f9 100644 --- a/tests/testthat/test-highlevel64.R +++ b/tests/testthat/test-highlevel64.R @@ -281,7 +281,7 @@ test_that("sorting methods work", { expect_identical(rank(x, method="orderrnk"), x_rank) x = as.integer64(1:100) - q = as.integer64(c(1L, 26L, 50L, 75L, 100L)) + q = as.integer64(c(1L, 26L, 51L, 75L, 100L)) expect_identical(quantile(x, names=FALSE), q) expect_identical(median(x), q[3L]) names(q) = c('0%', '25%', '50%', '75%', '100%') @@ -292,7 +292,7 @@ test_that("sorting methods work", { expect_error(quantile(NA_integer64_), "missing values not allowed") x = as.integer64(1:100) - q = as.integer64(c(1L, 26L, 50L, 75L, 100L)) + q = as.integer64(c(1L, 26L, 51L, 75L, 100L)) names(q) = c('0%', '25%', '50%', '75%', '100%') expect_identical(qtile(x, method="sortqtl"), q) expect_identical(qtile(x, method="orderqtl"), q) diff --git a/tests/testthat/test-sortuse64.R b/tests/testthat/test-sortuse64.R index 91ec931..4426db5 100644 --- a/tests/testthat/test-sortuse64.R +++ b/tests/testthat/test-sortuse64.R @@ -173,3 +173,22 @@ test_that("sortorderkey works", { # Note: NA_integer_ is used for NAs in the key vector expect_identical(sortorderkey(x_na, o_na, na.skip.num=2L), c(NA_integer_, NA_integer_, 1L, 2L)) }) + +with_parameters_test_that("quantile, median", { + x32 = as.integer(x) + x64 = as.integer64(x32) + convert_x32_result_to_integer64 = function(x) { + myRound = function(x) {res = round(x); res[x%%1 == 0.5] = ceiling(x[x%%1 == 0.5]); res} + setNames(as.integer64(myRound(x)), names(x)) + } + expect_identical(quantile(x64, probs=probs, na.rm=TRUE), convert_x32_result_to_integer64(quantile(x32, probs=probs, na.rm=TRUE))) + expect_identical(median(x64, na.rm=TRUE), convert_x32_result_to_integer64(median(x32, na.rm=TRUE))) + }, .cases = expand.grid( + x = I(list(c(1, 5, 7, NA, 1), c(1, 5, 7, NA, 1), 1:2, 1:3, c(1, 3), c(-5, -2, 0, 2))), + probs = I(list(c(0, 0.25, 0.5, 0.75, 1), c(0.1, 0.6, 0.9))) + ) +) + +test_that("special median", { + expect_identical(median(as.integer64(c("1152921504606846976", "1152921504606847232"))), as.integer64("1152921504606847104")) +})