From 85adf09e3463838d547977ae9bc75e3b37f9cbaf Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Mon, 8 Mar 2021 02:11:19 +0200 Subject: [PATCH 01/16] Internal class aware coerceAs. Already used in nafill and froll (#4491) --- NEWS.md | 11 +++ R/data.table.R | 3 +- R/shift.R | 4 - R/wrappers.R | 3 +- inst/tests/froll.Rraw | 12 +-- inst/tests/nafill.Rraw | 203 ++++++++++++++++++++++++++++++++--------- inst/tests/tests.Rraw | 4 +- man/froll.Rd | 2 +- man/nafill.Rd | 2 +- src/assign.c | 31 ++++--- src/data.table.h | 5 +- src/frollR.c | 67 ++++---------- src/init.c | 8 +- src/nafill.c | 114 +++++++++++++---------- src/types.h | 3 +- src/utils.c | 114 +++++++++++------------ 16 files changed, 353 insertions(+), 233 deletions(-) diff --git a/NEWS.md b/NEWS.md index a51de94eb6..0ec4b3d736 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,10 +6,21 @@ ## NEW FEATURES +1. `nafill()` now applies `fill=` to the front/back of the vector when `type="locf|nocb"`, [#3594](https://github.com/Rdatatable/data.table/issues/3594). Thanks to @ben519 for the feature request. It also now returns a named object based on the input names. Note that if you are considering joining and then using `nafill(...,type='locf|nocb')` afterwards, please review `roll=`/`rollends=` which should achieve the same result in one step more efficiently. `nafill()` is for when filling-while-joining (i.e. `roll=`/`rollends=`/`nomatch=`) cannot be applied. + ## BUG FIXES ## NOTES +1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : + + ```R + x = c(2L,NA,4L,5L) + nafill(x, fill=3) # no warning; requiring 3L too inconvenient + nafill(x, fill="3") # warns in case either x or "3" was a mistake + nafill(x, fill=3.14) # warns that precision has been lost + nafill(x, fill=as.integer(3.14)) # no warning; the as. conveys intent + ``` # data.table [v1.14.0](https://github.com/Rdatatable/data.table/milestone/23?closed=1) (submitted to CRAN on 20 Feb 2021) diff --git a/R/data.table.R b/R/data.table.R index 2b010db77a..961d9eb857 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -141,7 +141,8 @@ replace_dot_alias = function(e) { return(ans) } if (!missing(verbose)) { - stopifnot(isTRUEorFALSE(verbose)) + if (!is.integer(verbose) && !is.logical(verbose)) stop("verbose must be logical or integer") + if (length(verbose)!=1 || anyNA(verbose)) stop("verbose must be length 1 non-NA") # set the global verbose option because that is fetched from C code without having to pass it through oldverbose = options(datatable.verbose=verbose) on.exit(options(oldverbose)) diff --git a/R/shift.R b/R/shift.R index 63a1cdec42..c73d8b0840 100644 --- a/R/shift.R +++ b/R/shift.R @@ -26,14 +26,10 @@ shift = function(x, n=1L, fill=NA, type=c("lag", "lead", "shift"), give.names=FA nafill = function(x, type=c("const","locf","nocb"), fill=NA, nan=NA) { type = match.arg(type) - if (type!="const" && !missing(fill)) - warning("argument 'fill' ignored, only make sense for type='const'") .Call(CnafillR, x, type, fill, nan_is_na(nan), FALSE, NULL) } setnafill = function(x, type=c("const","locf","nocb"), fill=NA, nan=NA, cols=seq_along(x)) { type = match.arg(type) - if (type!="const" && !missing(fill)) - warning("argument 'fill' ignored, only make sense for type='const'") invisible(.Call(CnafillR, x, type, fill, nan_is_na(nan), TRUE, cols)) } diff --git a/R/wrappers.R b/R/wrappers.R index 5fec33a92f..0c226b9f30 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -9,6 +9,7 @@ fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) -coerceFill = function(x) .Call(CcoerceFillR, x) testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L]) + +coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy) diff --git a/inst/tests/froll.Rraw b/inst/tests/froll.Rraw index 84143e587c..f6a4f96a80 100644 --- a/inst/tests/froll.Rraw +++ b/inst/tests/froll.Rraw @@ -78,15 +78,15 @@ test(6000.011, frollmean(x, n, adaptive=TRUE), list(c(NA, 1, 1.25), c(NA, 1, 1.2 #### error on unsupported type dx = data.table(real=1:10/2, char=letters[1:10]) -test(6000.012, frollmean(dx, 3), error="x must be list, data.frame or data.table of numeric or logical types") +test(6000.012, frollmean(dx, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") dx = data.table(real=1:10/2, fact=factor(letters[1:10])) -test(6000.013, frollmean(dx, 3), error="x must be list, data.frame or data.table of numeric or logical types") +test(6000.013, frollmean(dx, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") #dx = data.table(real=1:10/2, logi=logical(10)) #test(6000.014, frollmean(dx, 3), error="x must be list, data.frame or data.table of numeric types") # commented out as support added in #3749, tested in .009 dx = data.table(real=1:10/2, list=rep(list(NA), 10)) -test(6000.015, frollmean(dx, 3), error="x must be list, data.frame or data.table of numeric or logical types") +test(6000.015, frollmean(dx, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") x = letters[1:10] -test(6000.016, frollmean(x, 3), error="x must be of type numeric or logical") +test(6000.016, frollmean(x, 3), error="x must be of type numeric or logical, or a list, data.frame or data.table of such") x = 1:10/2 test(6000.017, frollmean(x, "a"), error="n must be integer") test(6000.018, frollmean(x, factor("a")), error="n must be integer") @@ -355,8 +355,8 @@ test(6000.074, frollmean(1:3, 2, fill=0L), c(0, 1.5, 2.5)) test(6000.075, frollmean(1:3, 2, fill=NA_integer_), c(NA_real_, 1.5, 2.5)) test(6000.076, frollmean(1:3, 2, fill=1:2), error="fill must be a vector of length 1") test(6000.077, frollmean(1:3, 2, fill=NA), c(NA_real_, 1.5, 2.5)) -test(6000.078, frollmean(1:3, 2, fill=TRUE), error="fill must be numeric") -test(6000.079, frollmean(1:3, 2, fill=FALSE), error="fill must be numeric") +test(6000.078, frollmean(1:3, 2, fill=TRUE), frollmean(1:3, 2, fill=1)) #error="fill must be numeric") # fill already coerced, as 'x' arg +test(6000.079, frollmean(1:3, 2, fill=FALSE), frollmean(1:3, 2, fill=0)) #error="fill must be numeric") test(6000.080, frollmean(1:3, 2, fill="a"), error="fill must be numeric") test(6000.081, frollmean(1:3, 2, fill=factor("a")), error="fill must be numeric") test(6000.082, frollmean(1:3, 2, fill=list(NA)), error="fill must be numeric") diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index f22a66f702..dcaa0f40d4 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -7,13 +7,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { test = data.table:::test INT = data.table:::INT colnamesInt = data.table:::colnamesInt - coerceFill = data.table:::coerceFill - # masked by which package? - # ================================= - copy = data.table::copy # bit64; copy is used in this file, so this line is needed - setattr = data.table::setattr # bit ; setattr does not appear in this file, so not needed. Here in case that changes. - # use of copy and setattr within data.table's own code is not masked by other packages - # we only have to do this in test files because, like a user would, these test files run like a user + coerceAs = data.table:::coerceAs } sugg = c( @@ -34,7 +28,7 @@ test(1.04, nafill(x, fill=5), INT(5,5,3,4,5,5,7,8,5,5)) test(1.05, nafill(x, fill=NA_integer_), x) test(1.06, nafill(x, fill=NA), x) test(1.07, nafill(x, fill=NA_real_), x) -test(1.08, nafill(x, fill=Inf), x) +test(1.08, nafill(x, fill=Inf), x, warning="precision lost") test(1.09, nafill(x, fill=NaN), x) y = x/2 test(1.11, nafill(y, "locf"), c(NA,NA,3,4,4,4,7,8,8,8)/2) @@ -53,31 +47,31 @@ z[9L] = -Inf test(1.21, nafill(z, "locf"), c(NA,Inf,3,4,4,4,7,8,-Inf,-Inf)/2) test(1.22, nafill(z, "nocb"), c(Inf,Inf,3,4,7,7,7,8,-Inf,NA)/2) dt = data.table(x, y, z) -test(1.31, nafill(dt, "locf"), unname(lapply(dt, nafill, "locf"))) -test(1.32, nafill(dt, "nocb"), unname(lapply(dt, nafill, "nocb"))) -test(1.33, nafill(dt, fill=0), unname(lapply(dt, nafill, fill=0))) +test(1.31, nafill(dt, "locf"), lapply(dt, nafill, "locf")) +test(1.32, nafill(dt, "nocb"), lapply(dt, nafill, "nocb")) +test(1.33, nafill(dt, fill=0), lapply(dt, nafill, fill=0)) l = list(x, y[1:8], z[1:6]) test(1.41, nafill(l, "locf"), lapply(l, nafill, "locf")) test(1.42, nafill(l, "nocb"), lapply(l, nafill, "nocb")) test(1.43, nafill(l, fill=0), lapply(l, nafill, fill=0)) l = list(a=c(1:2,NA,4:5), b=as.Date(c(1:2,NA,4:5), origin="1970-01-01"), d=c(NA,2L,NA,4L,NA), e=as.Date(c(NA,2L,NA,4L,NA), origin="1970-01-01")) # Date retain class #3617 -test(1.44, nafill(l, "locf"), list(c(1:2,2L,4:5), structure(c(1,2,2,4,5), class="Date"), c(NA,2L,2L,4L,4L), structure(c(NA,2,2,4,4), class="Date"))) -test(1.45, nafill(l, "nocb"), list(c(1:2,4L,4:5), structure(c(1,2,4,4,5), class="Date"), c(2L,2L,4L,4L,NA), structure(c(2,2,4,4,NA), class="Date"))) -test(1.46, nafill(l, fill=0), list(c(1:2,0L,4:5), structure(c(1,2,0,4,5), class="Date"), c(0L,2L,0L,4L,0L), structure(c(0,2,0,4,0), class="Date"))) -test(1.47, nafill(l, fill=as.Date(0, origin="1970-01-01")), list(c(1:2,0L,4:5), structure(c(1,2,0,4,5), class="Date"), c(0L,2L,0L,4L,0L), structure(c(0,2,0,4,0), class="Date"))) -test(1.48, nafill(l, fill=as.Date("2019-06-05")), list(c(1:2,18052L,4:5), structure(c(1,2,18052,4,5), class="Date"), c(18052L,2L,18052L,4L,18052L), structure(c(18052,2,18052,4,18052), class="Date"))) +test(1.44, nafill(l, "locf"), list(a=c(1:2,2L,4:5), b=structure(c(1,2,2,4,5), class="Date"), d=c(NA,2L,2L,4L,4L), e=structure(c(NA,2,2,4,4), class="Date"))) +test(1.45, nafill(l, "nocb"), list(a=c(1:2,4L,4:5), b=structure(c(1,2,4,4,5), class="Date"), d=c(2L,2L,4L,4L,NA), e=structure(c(2,2,4,4,NA), class="Date"))) +test(1.46, nafill(l, fill=0), list(a=c(1:2,0L,4:5), b=structure(c(1,2,0,4,5), class="Date"), d=c(0L,2L,0L,4L,0L), e=structure(c(0,2,0,4,0), class="Date"))) +test(1.47, nafill(l, fill=as.Date(0, origin="1970-01-01")), list(a=c(1:2,0L,4:5), b=structure(c(1,2,0,4,5), class="Date"), d=c(0L,2L,0L,4L,0L), e=structure(c(0,2,0,4,0), class="Date"))) +test(1.48, nafill(l, fill=as.Date("2019-06-05")), list(a=c(1:2,18052L,4:5), b=structure(c(1,2,18052,4,5), class="Date"), d=c(18052L,2L,18052L,4L,18052L), e=structure(c(18052,2,18052,4,18052), class="Date"))) test(1.49, nafill(numeric()), numeric()) if (test_bit64) { l = list(a=as.integer64(c(1:2,NA,4:5)), b=as.integer64(c(NA,2L,NA,4L,NA))) - test(1.61, lapply(nafill(l, "locf"), as.character), lapply(list(c(1:2,2L,4:5), c(NA,2L,2L,4L,4L)), as.character)) - test(1.62, lapply(nafill(l, "nocb"), as.character), lapply(list(c(1:2,4L,4:5), c(2L,2L,4L,4L,NA)), as.character)) - test(1.63, lapply(nafill(l, fill=0), as.character), lapply(list(c(1:2,0L,4:5), c(0L,2L,0L,4L,0L)), as.character)) - test(1.64, lapply(nafill(l, fill=as.integer64(0)), as.character), lapply(list(c(1:2,0L,4:5), c(0L,2L,0L,4L,0L)), as.character)) - test(1.65, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(c("1","2","3000000000","4","5"), c("3000000000","2","3000000000","4","3000000000"))) + test(1.61, lapply(nafill(l, "locf"), as.character), lapply(list(a=c(1:2,2L,4:5), b=c(NA,2L,2L,4L,4L)), as.character)) + test(1.62, lapply(nafill(l, "nocb"), as.character), lapply(list(a=c(1:2,4L,4:5), b=c(2L,2L,4L,4L,NA)), as.character)) + test(1.63, lapply(nafill(l, fill=0), as.character), lapply(list(a=c(1:2,0L,4:5), b=c(0L,2L,0L,4L,0L)), as.character)) + test(1.64, lapply(nafill(l, fill=as.integer64(0)), as.character), lapply(list(a=c(1:2,0L,4:5), b=c(0L,2L,0L,4L,0L)), as.character)) + test(1.65, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(a=c("1","2","3000000000","4","5"), b=c("3000000000","2","3000000000","4","3000000000"))) l = lapply(l, `+`, as.integer64("3000000000")) - test(1.66, lapply(nafill(l, "locf"), as.character), list(c("3000000001","3000000002","3000000002","3000000004","3000000005"), c(NA_character_,"3000000002","3000000002","3000000004","3000000004"))) - test(1.67, lapply(nafill(l, "nocb"), as.character), list(c("3000000001","3000000002","3000000004","3000000004","3000000005"), c("3000000002","3000000002","3000000004","3000000004",NA_character_))) - test(1.68, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(c("3000000001","3000000002","3000000000","3000000004","3000000005"), c("3000000000","3000000002","3000000000","3000000004","3000000000"))) + test(1.66, lapply(nafill(l, "locf"), as.character), list(a=c("3000000001","3000000002","3000000002","3000000004","3000000005"), b=c(NA_character_,"3000000002","3000000002","3000000004","3000000004"))) + test(1.67, lapply(nafill(l, "nocb"), as.character), list(a=c("3000000001","3000000002","3000000004","3000000004","3000000005"), b=c("3000000002","3000000002","3000000004","3000000004",NA_character_))) + test(1.68, lapply(nafill(l, fill=as.integer64("3000000000")), as.character), list(a=c("3000000001","3000000002","3000000000","3000000004","3000000005"), b=c("3000000000","3000000002","3000000000","3000000004","3000000000"))) test(1.69, nafill(c(1L,2L,NA,4L), fill=as.integer64(3L)), 1:4) test(1.70, nafill(c(1L,2L,NA,4L), fill=as.integer64(NA)), c(1:2,NA,4L)) test(1.71, nafill(c(1,2,NA,4), fill=as.integer64(3)), c(1,2,3,4)) @@ -90,10 +84,10 @@ if (test_bit64) { } if (test_nanotime) { l = list(a=nanotime(c(1:2,NA,4:5)), b=nanotime(c(NA,2L,NA,4L,NA))) - test(1.91, lapply(nafill(l, "locf"), as.character), lapply(list(nanotime(c(1:2,2L,4:5)), nanotime(c(NA,2L,2L,4L,4L))), as.character)) - test(1.92, lapply(nafill(l, "nocb"), as.character), lapply(list(nanotime(c(1:2,4L,4:5)), nanotime(c(2L,2L,4L,4L,NA))), as.character)) - test(1.93, lapply(nafill(l, fill=0), as.character), lapply(list(nanotime(c(1:2,0L,4:5)), nanotime(c(0L,2L,0L,4L,0L))), as.character)) - test(1.94, lapply(nafill(l, fill=nanotime(0)), as.character), lapply(list(nanotime(c(1:2,0L,4:5)), nanotime(c(0L,2L,0L,4L,0L))), as.character)) + test(1.91, lapply(nafill(l, "locf"), as.character), lapply(list(a=nanotime(c(1:2,2L,4:5)), b=nanotime(c(NA,2L,2L,4L,4L))), as.character)) + test(1.92, lapply(nafill(l, "nocb"), as.character), lapply(list(a=nanotime(c(1:2,4L,4:5)), b=nanotime(c(2L,2L,4L,4L,NA))), as.character)) + test(1.93, lapply(nafill(l, fill=0), as.character), lapply(list(a=nanotime(c(1:2,0L,4:5)), b=nanotime(c(0L,2L,0L,4L,0L))), as.character)) + test(1.94, lapply(nafill(l, fill=nanotime(0)), as.character), lapply(list(a=nanotime(c(1:2,0L,4:5)), b=nanotime(c(0L,2L,0L,4L,0L))), as.character)) } # setnafill @@ -120,13 +114,13 @@ test(2.08, unname(l), list(c(1:2,18052L,4:5), structure(c(1,2,18052,4,5), class= # exceptions test coverage x = 1:10 -test(3.01, nafill(x, "locf", fill=0L), nafill(x, "locf"), warning="argument 'fill' ignored") -test(3.02, setnafill(list(copy(x)), "locf", fill=0L), setnafill(list(copy(x)), "locf"), warning="argument 'fill' ignored") +test(3.01, nafill(x, "locf", fill=0L), x) +test(3.02, setnafill(list(copy(x)), "locf", fill=0L), list(x)) test(3.03, setnafill(x, "locf"), error="in-place update is supported only for list") test(3.04, nafill(letters[1:5], fill=0), error="must be numeric type, or list/data.table") test(3.05, setnafill(list(letters[1:5]), fill=0), error="must be numeric type, or list/data.table") test(3.06, nafill(x, fill=1:2), error="fill must be a vector of length 1") -test(3.07, nafill(x, fill="asd"), error="fill argument must be numeric") +test(3.07, nafill(x, fill="asd"), x, warning=c("Coercing.*character.*integer","NAs introduced by coercion")) # colnamesInt helper dt = data.table(a=1, b=2, d=3) @@ -166,32 +160,33 @@ if (test_bit64) { } options(old) -# coerceFill +# coerceAs int/numeric/int64 as used in nafill if (test_bit64) { - test(6.01, coerceFill(1:2), error="fill argument must be length 1") - test(6.02, coerceFill("a"), error="fill argument must be numeric") + coerceFill = function(x) lapply(list(1L, 1.0, as.integer64(1)), coerceAs, x=x) # old function used before #4491 + #test(6.01, coerceFill(1:2), error="fill argument must be length 1") + #test(6.02, coerceFill("a"), error="fill argument must be numeric") test(6.11, identical(coerceFill(NA), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.21, identical(coerceFill(3L), list(3L, 3, as.integer64(3)))) test(6.22, identical(coerceFill(0L), list(0L, 0, as.integer64(0)))) test(6.23, identical(coerceFill(NA_integer_), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.31, identical(coerceFill(as.integer64(3)), list(3L, 3, as.integer64(3)))) - test(6.32, identical(coerceFill(as.integer64(3000000003)), list(NA_integer_, 3000000003, as.integer64("3000000003")))) + test(6.32, identical(coerceFill(as.integer64(3000000003)), list(NA_integer_, 3000000003, as.integer64("3000000003"))), warning="out-of-range") test(6.33, identical(coerceFill(as.integer64(0)), list(0L, 0, as.integer64(0)))) test(6.34, identical(coerceFill(as.integer64(NA)), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.41, identical(coerceFill(3), list(3L, 3, as.integer64(3)))) test(6.42, identical(coerceFill(0), list(0L, 0, as.integer64(0)))) test(6.43, identical(coerceFill(NA_real_), list(NA_integer_, NA_real_, as.integer64(NA)))) test(6.44, identical(coerceFill(NaN), list(NA_integer_, NaN, as.integer64(NA)))) - test(6.45, identical(coerceFill(Inf), list(NA_integer_, Inf, as.integer64(NA)))) - test(6.46, identical(coerceFill(-Inf), list(NA_integer_, -Inf, as.integer64(NA)))) - test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904")))) - test(6.48, identical(coerceFill(-(2^64)), list(NA_integer_, -(2^64), as.integer64(NA)))) + test(6.45, identical(coerceFill(Inf), list(NA_integer_, Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) + test(6.46, identical(coerceFill(-Inf), list(NA_integer_, -Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) + test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))), warning=c("precision lost","precision lost")) + test(6.48, identical(coerceFill(-(2^64)), list(NA_integer_, -(2^64), as.integer64(NA))), warning=c("precision lost","precision lost")) test(6.49, identical(coerceFill(x<-as.integer64(-2147483647)), list(-2147483647L, -2147483647, x))) - test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x))) - test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x))) + test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="out-of-range") + test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="out-of-range") test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647")))) test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648")))) - test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649")))) + test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning=c("precision lost","precision lost")) } # nan argument to treat NaN as NA in nafill, #4020 @@ -209,3 +204,127 @@ test(7.07, setnafill(DT, fill=0, cols=1L), copy(DT)[ , a := ans1]) test(7.08, setnafill(DT, fill=0, nan=NaN), copy(DT)[ , c('a', 'b') := .(ans1, ans2)]) test(7.09, nafill(x, fill=0, nan=c(NA, NaN)), error="Argument 'nan' must be length 1") test(7.10, nafill(x, fill=0, nan=Inf), error="Argument 'nan' must be NA or NaN") + +# new tests for fill list +d = data.table(x = c(1:2,NA,4L), y = c(1,2,NA,4)) +test(8.01, nafill(d, fill=3), list(x=1:4, y=c(1,2,3,4))) +test(8.02, nafill(d, fill=3L), list(x=1:4, y=c(1,2,3,4))) +test(8.03, nafill(d, fill=list(3L,3)), list(x=1:4, y=c(1,2,3,4))) +test(8.04, nafill(d, fill=list(3,3L)), list(x=1:4, y=c(1,2,3,4))) +test(8.05, nafill(d, fill=list(3,NA)), list(x=1:4, y=c(1,2,NA,4))) +test(8.06, nafill(d, fill=list(1,9L)), list(x=c(1:2,1L,4L), y=c(1,2,9,4))) +d = as.data.table(setNames(as.list(seq_along(letters)), letters)) ## test names and scalar returned +test(8.11, names(nafill(d, fill=3)), letters) +test(8.12, nafill(c(1:2,NA,4L), "locf"), c(1:2,2L,4L)) +test(8.13, nafill(list(x=c(1:2,NA,4L)), "locf"), list(x=c(1:2,2L,4L))) + +# Extend functionality of nafill to use 'fill' argument for all types #3594 +test(9.01, nafill(c(NA,1,NA,NA,5,3,NA,0), type="locf", fill=-1), `[<-`(nafill(c(NA,1,NA,NA,5,3,NA,0), type="locf"), 1L, -1)) +x = xx = c(rep(NA,2),3:4,rep(NA,2)) +test(9.11, nafill(x, "locf", 0), `[<-`(nafill(x, "locf"), 1:2, 0L)) +test(9.12, nafill(x, "nocb", 0), `[<-`(nafill(x, "nocb"), 5:6, 0L)) +test(9.13, nafill(x, "locf", -1), `[<-`(nafill(x, "locf"), 1:2, -1L)) +test(9.14, nafill(x, "nocb", -1), `[<-`(nafill(x, "nocb"), 5:6, -1L)) +x = as.double(xx) +test(9.21, nafill(x, "locf", 0), `[<-`(nafill(x, "locf"), 1:2, 0)) +test(9.22, nafill(x, "nocb", 0), `[<-`(nafill(x, "nocb"), 5:6, 0)) +test(9.23, nafill(x, "locf", -1), `[<-`(nafill(x, "locf"), 1:2, -1)) +test(9.24, nafill(x, "nocb", -1), `[<-`(nafill(x, "nocb"), 5:6, -1)) +if (test_bit64) { + x = as.integer64(xx) + # `[<-.integer64` does not work + seti64 = function(x, i, value) {x[i] = value; x} + test(9.31, nafill(x, "locf", 0), seti64(nafill(x, "locf"), 1:2, as.integer64(0))) + test(9.32, nafill(x, "nocb", 0), seti64(nafill(x, "nocb"), 5:6, as.integer64(0))) + test(9.33, nafill(x, "locf", -1), seti64(nafill(x, "locf"), 1:2, as.integer64(-1))) + test(9.34, nafill(x, "nocb", -1), seti64(nafill(x, "nocb"), 5:6, as.integer64(-1))) +} + +# coerceAs verbose +options(datatable.verbose=2L) +input = 1 +test(10.01, ans<-coerceAs(input, 1), 1, output="double[numeric] into double[numeric]") +test(10.02, address(input)!=address(ans)) +test(10.03, ans<-coerceAs(input, 1, copy=FALSE), 1, output="copy=false and input already of expected type and class double[numeric]") +test(10.04, address(input), address(ans)) +test(10.05, ans<-coerceAs(input, 1L), 1L, output="double[numeric] into integer[integer]") +test(10.06, address(input)!=address(ans)) +test(10.07, ans<-coerceAs(input, 1L, copy=FALSE), 1L, output="double[numeric] into integer[integer]", notOutput="copy=false") +test(10.08, address(input)!=address(ans)) +test(10.09, coerceAs("1", 1L), 1L, output="character[character] into integer[integer]", warning="Coercing.*character.*integer") +test(10.10, coerceAs("1", 1), 1, output="character[character] into double[numeric]", warning="Coercing.*character.*double") +test(10.11, coerceAs("a", factor("x")), factor("a", levels=c("x","a")), output="character[character] into integer[factor]") ## levels of 'as' are retained! +test(10.12, coerceAs("a", factor()), factor("a"), output="character[character] into integer[factor]") +test(10.13, coerceAs(1, factor("x")), factor("x"), output="double[numeric] into integer[factor]") +test(10.14, coerceAs(1, factor("x", levels=c("x","y"))), factor("x", levels=c("x","y")), output="double[numeric] into integer[factor]") +test(10.15, coerceAs(2, factor("x", levels=c("x","y"))), factor("y", levels=c("x","y")), output="double[numeric] into integer[factor]") +test(10.16, coerceAs(1:2, factor(c("x","y"))), factor(c("x","y")), output="integer[integer] into integer[factor]") +test(10.17, coerceAs(1:3, factor(c("x","y"))), output="integer[integer] into integer[factor]", error="factor numbers.*3 is outside the level range") +test(10.18, coerceAs(c(1,2,3), factor(c("x","y"))), output="double[numeric] into integer[factor]", error="factor numbers.*3.000000 is outside the level range") +test(10.19, coerceAs(factor("x"), factor(c("x","y"))), factor("x", levels=c("x","y")), output="integer[factor] into integer[factor]") +test(10.20, coerceAs(factor("x"), factor(c("x","y")), copy=FALSE), factor("x", levels=c("x","y")), output="input already of expected type and class") ## copy=F has copyMostAttrib +a = structure("a", class="a") +b = structure("b", class="b") +test(10.21, coerceAs(a, b), structure("a", class="b"), output="character[a] into character[b]") +a = structure(1L, class="a") +b = structure(2L, class="b") +test(10.22, coerceAs(a, b), structure(1L, class="b"), output="integer[a] into integer[b]") +a = structure(1, class="a") +b = structure(2, class="b") +test(10.23, coerceAs(a, b), structure(1, class="b"), output="double[a] into double[b]") +a = structure(1, class="a") +b = structure(2L, class="b") +test(10.24, coerceAs(a, b), structure(1L, class="b"), output="double[a] into integer[b]") +if (test_bit64) { + x = as.integer64(1L) + test(10.81, coerceAs(x, 1), 1, output="double[integer64] into double[numeric]") + test(10.82, coerceAs(x, 1L), 1L, output="double[integer64] into integer[integer]") + test(10.83, coerceAs(x, "1"), error="please use as.character", output="double[integer64] into character[character]") # not yet implemented + test(10.84, coerceAs(1, x), x, output="double[numeric] into double[integer64]") + test(10.85, coerceAs(1L, x), x, output="integer[integer] into double[integer64]") + test(10.86, coerceAs("1", x), x, output="character[character] into double[integer64]", warning="Coercing.*character") + options(datatable.verbose=3L) + test(10.87, coerceAs(x, 1L), 1L, output=c("double[integer64] into integer[integer]","Zero-copy coerce when assigning 'integer64' to 'integer'")) + test(10.88, coerceAs(1L, x), x, output=c("integer[integer] into double[integer64]","Zero-copy coerce when assigning 'integer' to 'integer64'")) + options(datatable.verbose=2L) +} +if (test_nanotime) { + x = nanotime(1L) + test(10.91, coerceAs(x, 1), 1, output="double[nanotime] into double[numeric]") + test(10.92, coerceAs(x, 1L), 1L, output="double[nanotime] into integer[integer]") + test(10.93, coerceAs(x, "1"), error="please use as.character", output="double[nanotime] into character[character]") # not yet implemented + test(10.94, coerceAs(1, x), x, output="double[numeric] into double[nanotime]") + test(10.95, coerceAs(1L, x), x, output="integer[integer] into double[nanotime]") + test(10.96, coerceAs("1", x), x, output="character[character] into double[nanotime]", warning="Coercing.*character") +} +options(datatable.verbose=FALSE) +test(11.01, coerceAs(list(a=1), 1), error="is not atomic") +test(11.02, coerceAs(1, list(a=1)), error="is not atomic") +test(11.03, coerceAs(sum, 1), error="is not atomic") +test(11.04, coerceAs(quote(1+1), 1), error="is not atomic") +test(11.05, coerceAs(as.name("x"), 1), error="is not atomic") +m = matrix(1:4, 2, 2) +a = array(1:8, c(2,2,2)) +test(11.06, coerceAs(m, 1L), error="must not be matrix or array") +test(11.07, coerceAs(1L, m), error="must not be matrix or array") +test(11.08, coerceAs(a, 1L), error="must not be matrix or array") +test(11.09, coerceAs(1L, a), error="must not be matrix or array") + +# nafill, setnafill for character, factor and other types #3992 +## logical +## character +## factor +## Date +## POSIXct +## IDate +## ITime +## nanotime + +# related to !is.integer(verbose) +test(99.1, data.table(a=1,b=2)[1,1, verbose=1], error="verbose must be logical or integer") +test(99.2, data.table(a=1,b=2)[1,1, verbose=1:2], error="verbose must be length 1 non-NA") +test(99.3, data.table(a=1,b=2)[1,1, verbose=NA], error="verbose must be length 1 non-NA") +options(datatable.verbose=1) +test(99.4, coerceAs(1, 2L), error="verbose option must be length 1 non-NA logical or integer") +options(datatable.verbose=FALSE) + diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c5910f5c81..95b8ba4492 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14352,7 +14352,7 @@ test(2005.09, set(DT, 1L, "c", expression(x+2)), error="type 'expression' cannot test(2005.10, set(DT, 1L, "d", expression(x+2)), error="type 'expression' cannot be coerced to 'logical'") test(2005.11, set(DT, 1L, "e", expression(x+2)), error="type 'expression' cannot be coerced to 'double'") test(2005.12, set(DT, 1L, "f", expression(x+2)), error="type 'expression' cannot be coerced to 'complex'") -test(2005.30, DT[2:3,c:=c(TRUE,FALSE), verbose=TRUE]$c, as.raw(INT(7,1,0)), +test(2005.30, DT[2:3,c:=c(TRUE,FALSE), verbose=3L]$c, as.raw(INT(7,1,0)), ## note verbose=3L for more deeper verbose output due to memrecycle messages when it is being re-used internally #4491 output="Zero-copy coerce when assigning 'logical' to 'raw' column 3 named 'c'") test(2005.31, set(DT,1L,"c",NA)$c, as.raw(INT(0,1,0))) test(2005.32, set(DT,1:2,"c",INT(-1,255))$c, as.raw(INT(0,255,0)), @@ -14388,7 +14388,7 @@ if (test_bit64) { warning="-1.*integer64.*position 1 taken as 0 when assigning.*raw.*column 3 named 'c'") test(2005.66, DT[2:3, f:=as.integer64(c(NA,"2147483648"))]$f, as.complex(c(-42,NA,2147483648))) DT[,h:=LETTERS[1:3]] - test(2005.67, DT[2:3, h:=as.integer64(1:2)], error="To assign integer64 to a character column, please use as.character.") + test(2005.67, DT[2:3, h:=as.integer64(1:2)], error="To assign integer64 to.*type character, please use as.character.") } # rbindlist raw type, #2819 diff --git a/man/froll.Rd b/man/froll.Rd index 070d28696d..388c47c485 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -25,7 +25,7 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) \item{x}{ vector, list, data.frame or data.table of numeric or logical columns. } \item{n}{ integer vector, for adaptive rolling function also list of integer vectors, rolling window size. } - \item{fill}{ numeric, value to pad by. Defaults to \code{NA}. } + \item{fill}{ numeric or logical, value to pad by. Defaults to \code{NA}. } \item{algo}{ character, default \code{"fast"}. When set to \code{"exact"}, then slower algorithm is used. It suffers less from floating point rounding error, performs extra pass to adjust rounding error diff --git a/man/nafill.Rd b/man/nafill.Rd index f8afb1dcfa..480f6ae118 100644 --- a/man/nafill.Rd +++ b/man/nafill.Rd @@ -16,7 +16,7 @@ setnafill(x, type=c("const","locf","nocb"), fill=NA, nan=NA, cols=seq_along(x)) \arguments{ \item{x}{ vector, list, data.frame or data.table of numeric columns. } \item{type}{ character, one of \emph{"const"}, \emph{"locf"} or \emph{"nocb"}. Defaults to \code{"const"}. } - \item{fill}{ numeric or integer, value to be used to fill when \code{type=="const"}. } + \item{fill}{ numeric or integer, value to be used to fill. } \item{nan}{ (numeric \code{x} only) Either \code{NaN} or \code{NA}; if the former, \code{NaN} is treated as distinct from \code{NA}, otherwise, they are treated the same during replacement? } \item{cols}{ numeric or character vector specifying columns to be updated. } } diff --git a/src/assign.c b/src/assign.c index 5c0b808707..27fbccbd0e 100644 --- a/src/assign.c +++ b/src/assign.c @@ -696,6 +696,8 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con if (colname==NULL) error(_("Internal error: memrecycle has received NULL colname")); // # nocov *memrecycle_message = '\0'; + static char targetDesc[501]; // from 1.14.1 coerceAs reuses memrecycle for a target vector, PR#4491 + snprintf(targetDesc, 500, colnum==0 ? _("target vector") : _("column %d named '%s'"), colnum, colname); int protecti=0; const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target); const bool sourceIsI64=isReal(source) && Rinherits(source, char_integer64); @@ -717,7 +719,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con for (int i=0; inlevel) { - error(_("Assigning factor numbers to column %d named '%s'. But %d is outside the level range [1,%d]"), colnum, colname, val, nlevel); + error(_("Assigning factor numbers to %s. But %d is outside the level range [1,%d]"), targetDesc, val, nlevel); } } } else { @@ -725,7 +727,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con for (int i=0; inlevel)) { - error(_("Assigning factor numbers to column %d named '%s'. But %f is outside the level range [1,%d], or is not a whole number."), colnum, colname, val, nlevel); + error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc, val, nlevel); } } } @@ -817,27 +819,27 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con } } } else if (isString(source) && !isString(target) && !isNewList(target)) { - warning(_("Coercing 'character' RHS to '%s' to match the type of the target column (column %d named '%s')."), - type2char(TYPEOF(target)), colnum, colname); + warning(_("Coercing 'character' RHS to '%s' to match the type of %s."), type2char(TYPEOF(target)), targetDesc); // this "Coercing ..." warning first to give context in case coerceVector warns 'NAs introduced by coercion' + // and also because 'character' to integer/double coercion is often a user mistake (e.g. wrong target column, or wrong + // variable on RHS) which they are more likely to appreciate than find inconvenient source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } else if (isNewList(source) && !isNewList(target)) { if (targetIsI64) { - error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of the target column (column %d named '%s')."), colnum, colname); + error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of %s."), targetDesc); // because R's coerceVector doesn't know about integer64 } // as in base R; e.g. let as.double(list(1,2,3)) work but not as.double(list(1,c(2,4),3)) // relied on by NNS, simstudy and table.express; tests 1294.* - warning(_("Coercing 'list' RHS to '%s' to match the type of the target column (column %d named '%s')."), - type2char(TYPEOF(target)), colnum, colname); + warning(_("Coercing 'list' RHS to '%s' to match the type of %s."), type2char(TYPEOF(target)), targetDesc); source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++; } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) { - if (GetVerbose()) { + if (GetVerbose()>=3) { // only take the (small) cost of GetVerbose() (search of options() list) when types don't match - Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n"), + Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' %s.\n"), sourceIsI64 ? "integer64" : type2char(TYPEOF(source)), targetIsI64 ? "integer64" : type2char(TYPEOF(target)), - colnum, colname); + targetDesc); } // The following checks are up front here, otherwise we'd need them twice in the two branches // inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future. @@ -850,10 +852,9 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con if (COND) { \ const char *sType = sourceIsI64 ? "integer64" : type2char(TYPEOF(source)); \ const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target)); \ - int n = snprintf(memrecycle_message, MSGSIZE, \ - "%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s'", val, sType, i+1, tType); \ - if (colnum>0 && n>0 && ndbl_v[0] = x[0]; if (nan_is_na) { + ans->dbl_v[0] = ISNAN(x[0]) ? fill : x[0]; for (uint_fast64_t i=1; idbl_v[i] = ISNAN(x[i]) ? ans->dbl_v[i-1] : x[i]; } } else { + ans->dbl_v[0] = ISNA(x[0]) ? fill : x[0]; for (uint_fast64_t i=1; idbl_v[i] = ISNA(x[i]) ? ans->dbl_v[i-1] : x[i]; } } } else if (type==2) { // nocb - ans->dbl_v[nx-1] = x[nx-1]; if (nan_is_na) { + ans->dbl_v[nx-1] = ISNAN(x[nx-1]) ? fill : x[nx-1]; for (int_fast64_t i=nx-2; i>=0; i--) { ans->dbl_v[i] = ISNAN(x[i]) ? ans->dbl_v[i+1] : x[i]; } } else { + ans->dbl_v[nx-1] = ISNA(x[nx-1]) ? fill : x[nx-1]; for (int_fast64_t i=nx-2; i>=0; i--) { ans->dbl_v[i] = ISNA(x[i]) ? ans->dbl_v[i+1] : x[i]; } @@ -49,12 +51,12 @@ void nafillInteger(int32_t *x, uint_fast64_t nx, unsigned int type, int32_t fill ans->int_v[i] = x[i]==NA_INTEGER ? fill : x[i]; } } else if (type==1) { // locf - ans->int_v[0] = x[0]; + ans->int_v[0] = x[0]==NA_INTEGER ? fill : x[0]; for (uint_fast64_t i=1; iint_v[i] = x[i]==NA_INTEGER ? ans->int_v[i-1] : x[i]; } } else if (type==2) { // nocb - ans->int_v[nx-1] = x[nx-1]; + ans->int_v[nx-1] = x[nx-1]==NA_INTEGER ? fill : x[nx-1]; for (int_fast64_t i=nx-2; i>=0; i--) { ans->int_v[i] = x[i]==NA_INTEGER ? ans->int_v[i+1] : x[i]; } @@ -71,12 +73,12 @@ void nafillInteger64(int64_t *x, uint_fast64_t nx, unsigned int type, int64_t fi ans->int64_v[i] = x[i]==NA_INTEGER64 ? fill : x[i]; } } else if (type==1) { // locf - ans->int64_v[0] = x[0]; + ans->int64_v[0] = x[0]==NA_INTEGER64 ? fill : x[0]; for (uint_fast64_t i=1; iint64_v[i] = x[i]==NA_INTEGER64 ? ans->int64_v[i-1] : x[i]; } } else if (type==2) { // nocb - ans->int64_v[nx-1] = x[nx-1]; + ans->int64_v[nx-1] = x[nx-1]==NA_INTEGER64 ? fill : x[nx-1]; for (int_fast64_t i=nx-2; i>=0; i--) { ans->int64_v[i] = x[i]==NA_INTEGER64 ? ans->int64_v[i+1] : x[i]; } @@ -92,25 +94,34 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S if (!xlength(obj)) return(obj); + double tic=0.0; + if (verbose) + tic = omp_get_wtime(); + bool binplace = LOGICAL(inplace)[0]; + if (!IS_TRUE_OR_FALSE(nan_is_na_arg)) + error("nan_is_na must be TRUE or FALSE"); // # nocov + bool nan_is_na = LOGICAL(nan_is_na_arg)[0]; + SEXP x = R_NilValue; - if (isVectorAtomic(obj)) { + bool obj_scalar = isVectorAtomic(obj); + if (obj_scalar) { if (binplace) error(_("'x' argument is atomic vector, in-place update is supported only for list/data.table")); else if (!isReal(obj) && !isInteger(obj)) error(_("'x' argument must be numeric type, or list/data.table of numeric types")); - x = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list - SET_VECTOR_ELT(x, 0, obj); - } else { - SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj) - x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++; - int *icols = INTEGER(ricols); - for (int i=0; i1) num_threads(getDTthreads(nx, true)) for (R_len_t i=0; iINT32_MAX || rfill<=INT32_MIN) ? NA_INTEGER : (int32_t)rfill; - dfill[0] = (double)rfill; - i64fill[0] = rfill; - } - } else { - double rfill = REAL(fill)[0]; - if (ISNAN(rfill)) { - // NA -> NA, NaN -> NaN - ifill[0] = NA_INTEGER; dfill[0] = rfill; i64fill[0] = NA_INTEGER64; - } else { - ifill[0] = (!R_FINITE(rfill) || rfill>INT32_MAX || rfill<=INT32_MIN) ? NA_INTEGER : (int32_t)rfill; - dfill[0] = rfill; - i64fill[0] = (!R_FINITE(rfill) || rfill>(double)INT64_MAX || rfill<=(double)INT64_MIN) ? NA_INTEGER64 : (int64_t)rfill; - } - } - } else if (isLogical(fill) && LOGICAL(fill)[0]==NA_LOGICAL) { - ifill[0] = NA_INTEGER; dfill[0] = NA_REAL; i64fill[0] = NA_INTEGER64; - } else { - error(_("%s: fill argument must be numeric"), __func__); - } -} -SEXP coerceFillR(SEXP fill) { - int protecti=0; - double dfill=NA_REAL; - int32_t ifill=NA_INTEGER; - int64_t i64fill=NA_INTEGER64; - coerceFill(fill, &dfill, &ifill, &i64fill); - SEXP ans = PROTECT(allocVector(VECSXP, 3)); protecti++; - SET_VECTOR_ELT(ans, 0, allocVector(INTSXP, 1)); - SET_VECTOR_ELT(ans, 1, allocVector(REALSXP, 1)); - SET_VECTOR_ELT(ans, 2, allocVector(REALSXP, 1)); - INTEGER(VECTOR_ELT(ans, 0))[0] = ifill; - REAL(VECTOR_ELT(ans, 1))[0] = dfill; - ((int64_t *)REAL(VECTOR_ELT(ans, 2)))[0] = i64fill; - setAttrib(VECTOR_ELT(ans, 2), R_ClassSymbol, ScalarString(char_integer64)); - UNPROTECT(protecti); - return ans; -} - inline bool INHERITS(SEXP x, SEXP char_) { // Thread safe inherits() by pre-calling install() in init.c and then // passing those char_* in here for simple and fast non-API pointer compare. @@ -374,6 +319,64 @@ SEXP coerceUtf8IfNeeded(SEXP x) { return(ans); } +// class1 is used by coerseAs only, which is used by frollR.c and nafill.c only +const char *class1(SEXP x) { + SEXP cl = getAttrib(x, R_ClassSymbol); + if (length(cl)) + return(CHAR(STRING_ELT(cl, 0))); + SEXP d = getAttrib(x, R_DimSymbol); + int nd = length(d); + if (nd) { + if (nd==2) + return "matrix"; + else + return "array"; + } + SEXPTYPE t = TYPEOF(x); + // see TypeTable in src/main/utils.c to compare to the differences here vs type2char + switch(t) { + case CLOSXP: case SPECIALSXP: case BUILTINSXP: + return "function"; + case REALSXP: + return "numeric"; + case SYMSXP: + return "name"; + case LANGSXP: + return "call"; + default: + return type2char(t); + } +} + +// main motivation for this function is to have coercion helper that is aware of int64 NAs, unline base R coerce #3913 +SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg) { + // copyArg does not update in place, but only IF an object is of the same type-class as class to be coerced, it will return with no copy + if (!isVectorAtomic(x)) + error("'x' is not atomic"); + if (!isVectorAtomic(as)) + error("'as' is not atomic"); + if (!isNull(getAttrib(x, R_DimSymbol))) + error("'x' must not be matrix or array"); + if (!isNull(getAttrib(as, R_DimSymbol))) + error("'as' must not be matrix or array"); + bool verbose = GetVerbose()>=2; // verbose level 2 required + if (!LOGICAL(copyArg)[0] && TYPEOF(x)==TYPEOF(as) && class1(x)==class1(as)) { + if (verbose) + Rprintf("copy=false and input already of expected type and class %s[%s]\n", type2char(TYPEOF(x)), class1(x)); + copyMostAttrib(as, x); // so attrs like factor levels are same for copy=T|F + return(x); + } + int len = LENGTH(x); + SEXP ans = PROTECT(allocNAVectorLike(as, len)); + if (verbose) + Rprintf("Coercing %s[%s] into %s[%s]\n", type2char(TYPEOF(x)), class1(x), type2char(TYPEOF(as)), class1(as)); + const char *ret = memrecycle(/*target=*/ans, /*where=*/R_NilValue, /*start=*/0, /*len=*/LENGTH(x), /*source=*/x, /*sourceStart=*/0, /*sourceLen=*/-1, /*colnum=*/0, /*colname=*/""); + if (ret) + warning(_("%s"), ret); + UNPROTECT(1); + return ans; +} + #ifndef NOZLIB #include #endif @@ -386,4 +389,3 @@ SEXP dt_zlib_version() { #endif return ScalarString(mkChar(out)); } - From 9084a15e72369f8d188edff4a36b813734adcfac Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 13 Mar 2024 23:32:26 -0700 Subject: [PATCH 02/16] Added the tests --- inst/atime/tests.R | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 inst/atime/tests.R diff --git a/inst/atime/tests.R b/inst/atime/tests.R new file mode 100644 index 0000000000..2a4c4dc7c8 --- /dev/null +++ b/inst/atime/tests.R @@ -0,0 +1,70 @@ +pkg.edit.fun = quote(function(old.Package, new.Package, sha, new.pkg.path) { + pkg_find_replace <- function(glob, FIND, REPLACE) { + atime::glob_find_replace(file.path(new.pkg.path, glob), FIND, REPLACE) + } + Package_regex <- gsub(".", "_?", old.Package, fixed = TRUE) + Package_ <- gsub(".", "_", old.Package, fixed = TRUE) + new.Package_ <- paste0(Package_, "_", sha) + pkg_find_replace( + "DESCRIPTION", + paste0("Package:\\s+", old.Package), + paste("Package:", new.Package)) + pkg_find_replace( + file.path("src", "Makevars.*in"), + Package_regex, + new.Package_) + pkg_find_replace( + file.path("R", "onLoad.R"), + Package_regex, + new.Package_) + pkg_find_replace( + file.path("R", "onLoad.R"), + sprintf('packageVersion\\("%s"\\)', old.Package), + sprintf('packageVersion\\("%s"\\)', new.Package)) + pkg_find_replace( + file.path("src", "init.c"), + paste0("R_init_", Package_regex), + paste0("R_init_", gsub("[.]", "_", new.Package_))) + pkg_find_replace( + "NAMESPACE", + sprintf('useDynLib\\("?%s"?', Package_regex), + paste0('useDynLib(', new.Package_)) + }) + +test.list <- list( + # Test based on https://github.com/Rdatatable/data.table/issues/5424 + # Performance regression introduced in: https://github.com/Rdatatable/data.table/pull/4491 + "Test regression introduced from #4491" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(3, 8), + expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), + setup = quote({ + n <- N/100 + set.seed(1L) + dt <- data.table( + g = sample(seq_len(n), N, TRUE), + x = runif(N), + key = "g") + dt_mod <- copy(dt) + }), + "Before"="be2f72e6f5c90622fe72e1c315ca05769a9dc854", + "Regression"="e793f53466d99f86e70fc2611b708ae8c601a451", + "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842"), + + # Test based on https://github.com/Rdatatable/data.table/issues/4200 + # Performance regression fixed in: https://github.com/Rdatatable/data.table/pull/4558 + "Test regression fixed in #4558" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(1, 20), + expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id3)), + setup = quote({ + set.seed(108) + d <- data.table( + id3 = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), + v1 = sample(5L, N, TRUE), + v2 = sample(5L, N, TRUE)) + }), + "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", + "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", + "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") +) From 7815afc1cb0c06f3987115d585d92f034c861519 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 14 Mar 2024 16:55:48 -0700 Subject: [PATCH 03/16] Making the titles of test cases consistent (" ... fixed in ... ") --- inst/atime/tests.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 2a4c4dc7c8..4aa62bfb3e 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -33,8 +33,9 @@ pkg.edit.fun = quote(function(old.Package, new.Package, sha, new.pkg.path) { test.list <- list( # Test based on https://github.com/Rdatatable/data.table/issues/5424 - # Performance regression introduced in: https://github.com/Rdatatable/data.table/pull/4491 - "Test regression introduced from #4491" = list( + # Performance regression introduced in https://github.com/Rdatatable/data.table/pull/4491 + # Fixed in https://github.com/Rdatatable/data.table/pull/5463 + "Test regression fixed in #5463" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(3, 8), expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), @@ -52,7 +53,7 @@ test.list <- list( "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842"), # Test based on https://github.com/Rdatatable/data.table/issues/4200 - # Performance regression fixed in: https://github.com/Rdatatable/data.table/pull/4558 + # Performance regression fixed in https://github.com/Rdatatable/data.table/pull/4558 "Test regression fixed in #4558" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(1, 20), From ef2db64d7b4d61bb1868134a30e8d782a78b2990 Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 19 Mar 2024 03:36:32 -0700 Subject: [PATCH 04/16] Added a test case which is not relevant or related to #4491/#5463 --- inst/atime/tests.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 4aa62bfb3e..4718756f5a 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -32,6 +32,20 @@ pkg.edit.fun = quote(function(old.Package, new.Package, sha, new.pkg.path) { }) test.list <- list( + # Performance regression fixed in: https://github.com/Rdatatable/data.table/pull/4440 + "Test regression fixed in #4440" = list( + pkg.edit.fun = pkg.edit.fun, + N = 10^seq(3,8), + setup = quote({ + set.seed(1L) + dt <- data.table(a = sample(N, N)) + setindex(dt, a) + }), + expr = quote(data.table:::shallow(dt)), + "Before"="ad7b67c80a551b7a1e2ef8b73d6162ed7737c934", + "Regression"="752012f577f8e268bb6d0084ca39a09fa7fbc1c4", + "Fixed"="9d3b9202fddb980345025a4f6ac451ed26a423be"), + # Test based on https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced in https://github.com/Rdatatable/data.table/pull/4491 # Fixed in https://github.com/Rdatatable/data.table/pull/5463 From f49bee39767936260f6693bf8b7223ec8a28b925 Mon Sep 17 00:00:00 2001 From: Ani Date: Fri, 29 Mar 2024 18:15:04 -0700 Subject: [PATCH 05/16] Trigger my updated workflow and check/test output (dividing the atime step runtime into installation and test time) --- inst/atime/tests.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 4718756f5a..63efcff90d 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -83,3 +83,4 @@ test.list <- list( "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") ) +# Just a change to trigger my updated workflow and check/test output From fa4353ee82aa7338a1e4d990f7dc0ccb8ddabcf9 Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 1 Apr 2024 01:47:47 -0700 Subject: [PATCH 06/16] Assigning the 1.12.6 commit SHA to potentially replicate the state of data.table before the regression that got reported in #4311 was introduced --- inst/atime/tests.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 63efcff90d..1bf9fb9e66 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -42,7 +42,7 @@ test.list <- list( setindex(dt, a) }), expr = quote(data.table:::shallow(dt)), - "Before"="ad7b67c80a551b7a1e2ef8b73d6162ed7737c934", + "Before"="8c5042ca9aa5c6217b460fa5e8bf01003c7be358", "Regression"="752012f577f8e268bb6d0084ca39a09fa7fbc1c4", "Fixed"="9d3b9202fddb980345025a4f6ac451ed26a423be"), @@ -83,4 +83,3 @@ test.list <- list( "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") ) -# Just a change to trigger my updated workflow and check/test output From 74ed5e9b05e8f66681a11c703c0d7384566c77f2 Mon Sep 17 00:00:00 2001 From: Ani Date: Mon, 1 Apr 2024 12:44:53 -0700 Subject: [PATCH 07/16] Assigning the 1.12.4 commit SHA to try the same --- inst/atime/tests.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 1bf9fb9e66..e4d9e805dc 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -62,7 +62,7 @@ test.list <- list( key = "g") dt_mod <- copy(dt) }), - "Before"="be2f72e6f5c90622fe72e1c315ca05769a9dc854", + "Before"="73c221f51c8b545bd5dd06719647aed384a2c4b2", "Regression"="e793f53466d99f86e70fc2611b708ae8c601a451", "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842"), From 988f918a9aac15fba5ddae698c19e21dbb4a6b5c Mon Sep 17 00:00:00 2001 From: Ani Date: Tue, 2 Apr 2024 12:41:06 -0700 Subject: [PATCH 08/16] Removed the 'Before' label since the commit SHAs associated with the state of data.table before that regression was introduced are not working. --- inst/atime/tests.R | 1 - 1 file changed, 1 deletion(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index e4d9e805dc..5a3ca3fc40 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -42,7 +42,6 @@ test.list <- list( setindex(dt, a) }), expr = quote(data.table:::shallow(dt)), - "Before"="8c5042ca9aa5c6217b460fa5e8bf01003c7be358", "Regression"="752012f577f8e268bb6d0084ca39a09fa7fbc1c4", "Fixed"="9d3b9202fddb980345025a4f6ac451ed26a423be"), From b955d0934472dfdee737c8534e1aed2bce1e4cd0 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 3 Apr 2024 12:00:55 -0700 Subject: [PATCH 09/16] Finalized the commits (correct and working) for the current set of tests. --- inst/atime/tests.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 5a3ca3fc40..a3cb1213a5 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -42,6 +42,7 @@ test.list <- list( setindex(dt, a) }), expr = quote(data.table:::shallow(dt)), + #"Before"="", # Unknown source, and all the commit SHAs (each dating before March 20 '20, when the regression was noticed via issue #4311) I tried failed. "Regression"="752012f577f8e268bb6d0084ca39a09fa7fbc1c4", "Fixed"="9d3b9202fddb980345025a4f6ac451ed26a423be"), @@ -61,7 +62,7 @@ test.list <- list( key = "g") dt_mod <- copy(dt) }), - "Before"="73c221f51c8b545bd5dd06719647aed384a2c4b2", + #"Before"="73c221f51c8b545bd5dd06719647aed384a2c4b2", # Previously working, currently fails. "Regression"="e793f53466d99f86e70fc2611b708ae8c601a451", "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842"), From f4f5e6ca00f3b5dfa038b20e874c5f21b3175fbd Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 3 Apr 2024 13:45:46 -0700 Subject: [PATCH 10/16] Changed to Slow/Fast from Regression/Fixed labels (also triggering my updated workflow) --- inst/atime/tests.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index a3cb1213a5..bba7399f0c 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -43,8 +43,8 @@ test.list <- list( }), expr = quote(data.table:::shallow(dt)), #"Before"="", # Unknown source, and all the commit SHAs (each dating before March 20 '20, when the regression was noticed via issue #4311) I tried failed. - "Regression"="752012f577f8e268bb6d0084ca39a09fa7fbc1c4", - "Fixed"="9d3b9202fddb980345025a4f6ac451ed26a423be"), + "Slow"="752012f577f8e268bb6d0084ca39a09fa7fbc1c4", + "Fast"="9d3b9202fddb980345025a4f6ac451ed26a423be"), # Test based on https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced in https://github.com/Rdatatable/data.table/pull/4491 @@ -63,8 +63,8 @@ test.list <- list( dt_mod <- copy(dt) }), #"Before"="73c221f51c8b545bd5dd06719647aed384a2c4b2", # Previously working, currently fails. - "Regression"="e793f53466d99f86e70fc2611b708ae8c601a451", - "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842"), + "Slow"="e793f53466d99f86e70fc2611b708ae8c601a451", + "Fast"="58409197426ced4714af842650b0cc3b9e2cb842"), # Test based on https://github.com/Rdatatable/data.table/issues/4200 # Performance regression fixed in https://github.com/Rdatatable/data.table/pull/4558 @@ -79,7 +79,7 @@ test.list <- list( v1 = sample(5L, N, TRUE), v2 = sample(5L, N, TRUE)) }), - "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", - "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", - "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") + #"Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", + "Slow" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", + "Fast" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") ) From 57e888d823e77ac475b7b47cbe2fd5ece088c757 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 3 Apr 2024 14:31:00 -0700 Subject: [PATCH 11/16] Test to see how the ideal case looks (since for the first test case removing Before made the Regression/Fixed lines change positions) --- inst/atime/tests.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index bba7399f0c..b380d65374 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -42,9 +42,10 @@ test.list <- list( setindex(dt, a) }), expr = quote(data.table:::shallow(dt)), - #"Before"="", # Unknown source, and all the commit SHAs (each dating before March 20 '20, when the regression was noticed via issue #4311) I tried failed. - "Slow"="752012f577f8e268bb6d0084ca39a09fa7fbc1c4", - "Fast"="9d3b9202fddb980345025a4f6ac451ed26a423be"), + #"Before"="", # Unknown source, and all the commit SHAs (each dating before March 20 '20, when the regression was noticed via issue #4311) I tried failed. + "Before"="9d3b9202fddb980345025a4f6ac451ed26a423be", + "Regression"="752012f577f8e268bb6d0084ca39a09fa7fbc1c4", + "Fixed"="9d3b9202fddb980345025a4f6ac451ed26a423be"), # Test based on https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced in https://github.com/Rdatatable/data.table/pull/4491 @@ -63,8 +64,9 @@ test.list <- list( dt_mod <- copy(dt) }), #"Before"="73c221f51c8b545bd5dd06719647aed384a2c4b2", # Previously working, currently fails. - "Slow"="e793f53466d99f86e70fc2611b708ae8c601a451", - "Fast"="58409197426ced4714af842650b0cc3b9e2cb842"), + "Before"="58409197426ced4714af842650b0cc3b9e2cb842", + "Regression"="e793f53466d99f86e70fc2611b708ae8c601a451", + "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842"), # Test based on https://github.com/Rdatatable/data.table/issues/4200 # Performance regression fixed in https://github.com/Rdatatable/data.table/pull/4558 @@ -79,7 +81,7 @@ test.list <- list( v1 = sample(5L, N, TRUE), v2 = sample(5L, N, TRUE)) }), - #"Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", - "Slow" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", - "Fast" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") + "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", + "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", + "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") ) From bddf3c9fa27c4bec91a2e842d7b516f9edc2bc75 Mon Sep 17 00:00:00 2001 From: Ani Date: Wed, 3 Apr 2024 23:49:59 -0700 Subject: [PATCH 12/16] Update tests.R --- inst/atime/tests.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index b380d65374..132f81d0da 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -85,3 +85,4 @@ test.list <- list( "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") ) +# Test to see if R is running with --vanilla From e64f68ebcb311e7c478e936326d0575b3ace7c32 Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 4 Apr 2024 11:20:28 -0700 Subject: [PATCH 13/16] Test to see if R is reading .Rprofile (setting an environment variable and accessing it) --- inst/atime/tests.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 132f81d0da..3db5c638ae 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -86,3 +86,4 @@ test.list <- list( "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") ) # Test to see if R is running with --vanilla +# Test to see if R is reading .Rprofile (setting an environment variable and accessing it) From eac907bbfc2bade1306c5e3574c0dd55536f28fb Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 4 Apr 2024 14:13:24 -0700 Subject: [PATCH 14/16] Debugging more to make the switch to using .Rprofile --- inst/atime/tests.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 3db5c638ae..35f1b8c408 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -87,3 +87,4 @@ test.list <- list( ) # Test to see if R is running with --vanilla # Test to see if R is reading .Rprofile (setting an environment variable and accessing it) +# Debugging more to make the switch to using .Rprofile From f2ba635c1380ad22a74fe3e8918241f8a5c267fb Mon Sep 17 00:00:00 2001 From: Ani Date: Thu, 4 Apr 2024 14:42:03 -0700 Subject: [PATCH 15/16] Since the environment variable is being read with the location change of .Rprofile, it is time to test with the CRAN mirror being only set therein [Reverted the last change of removing the git switch step; see https://github.com/Anirban166/Autocomment-atime-results/issues/33#issuecomment-2038431272 for more details] --- inst/atime/tests.R | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index 35f1b8c408..c69cd33e88 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -88,3 +88,4 @@ test.list <- list( # Test to see if R is running with --vanilla # Test to see if R is reading .Rprofile (setting an environment variable and accessing it) # Debugging more to make the switch to using .Rprofile +# Since the environment variable is being read with the location change of .Rprofile, it is time to test with the CRAN mirror being only set therein From 9b08ceed739f67d61d51f12cb8f4fd12a26345a9 Mon Sep 17 00:00:00 2001 From: Ani Date: Fri, 5 Apr 2024 12:28:43 -0700 Subject: [PATCH 16/16] Omit one test case for a 1:1 historical regression comparison between the remaining two cases --- inst/atime/tests.R | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/inst/atime/tests.R b/inst/atime/tests.R index c69cd33e88..a1dc9ce3b3 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -66,24 +66,24 @@ test.list <- list( #"Before"="73c221f51c8b545bd5dd06719647aed384a2c4b2", # Previously working, currently fails. "Before"="58409197426ced4714af842650b0cc3b9e2cb842", "Regression"="e793f53466d99f86e70fc2611b708ae8c601a451", - "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842"), + "Fixed"="58409197426ced4714af842650b0cc3b9e2cb842") # Test based on https://github.com/Rdatatable/data.table/issues/4200 # Performance regression fixed in https://github.com/Rdatatable/data.table/pull/4558 - "Test regression fixed in #4558" = list( - pkg.edit.fun = pkg.edit.fun, - N = 10^seq(1, 20), - expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id3)), - setup = quote({ - set.seed(108) - d <- data.table( - id3 = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), - v1 = sample(5L, N, TRUE), - v2 = sample(5L, N, TRUE)) - }), - "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", - "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", - "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") + #"Test regression fixed in #4558" = list( + #pkg.edit.fun = pkg.edit.fun, + #N = 10^seq(1, 20), + #expr = quote(data.table:::`[.data.table`(d, , (max(v1) - min(v2)), by = id3)), + #setup = quote({ + # set.seed(108) + # d <- data.table( + # id3 = sample(c(seq.int(N * 0.9), sample(N * 0.9, N * 0.1, TRUE))), + # v1 = sample(5L, N, TRUE), + # v2 = sample(5L, N, TRUE)) + # }), + # "Before" = "15f0598b9828d3af2eb8ddc9b38e0356f42afe4f", + # "Regression" = "6f360be0b2a6cf425f6df751ca9a99ec5d35ed93", + # "Fixed" = "ba32f3cba38ec270587e395f6e6c26a80be36be6") ) # Test to see if R is running with --vanilla # Test to see if R is reading .Rprofile (setting an environment variable and accessing it)