diff --git a/NAMESPACE b/NAMESPACE index 81c0fce689..ae54e95d11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -187,6 +187,8 @@ S3method(seq, ITime) S3method(unique, IDate) S3method(unique, ITime) S3method('[<-', IDate) +S3method('min', IDate) +S3method('max', IDate) S3method(edit, data.table) # generics to support custom column formatters diff --git a/NEWS.md b/NEWS.md index 8824204aa7..a6fbba8fbf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -253,6 +253,34 @@ # no inconvenient warning ``` + On the same basis, `min` and `max` methods for empty `IDate` input now return `NA_integer_` of class `IDate`, rather than `NA_double_` of class `IDate` together with base R's warning `no non-missing arguments to min; returning Inf`, [#2256](https://github.com/Rdatatable/data.table/issues/2256]. The type change and warning would cause an error in grouping, see example below. Since `NA` was returned before it seems clear that still returning `NA` but of the correct type and with no warning is appropriate, backwards compatible, and a bug fix. Thanks to Frank Narf for reporting, and Matt Dowle for fixing. + + ```R + DT + # d g + # + # 1: 2020-01-01 a + # 2: 2020-01-02 a + # 3: 2019-12-31 b + + DT[, min(d[d>"2020-01-01"]), by=g] + + # was: + + # Error in `[.data.table`(DT, , min(d[d > "2020-01-01"]), by = g) : + # Column 1 of result for group 2 is type 'double' but expecting type 'integer'. Column types must be consistent for each group. + # In addition: Warning message: + # In min.default(integer(0), na.rm = FALSE) : + # no non-missing arguments to min; returning Inf + + # now : + + # g V1 + # + # 1: a 2020-01-02 + # 2: b + ``` + 36. `DT[, min(int64Col), by=grp]` (and `max`) would return incorrect results for `bit64::integer64` columns, [#4444](https://github.com/Rdatatable/data.table/issues/4444). Thanks to @go-see for reporting, and Michael Chirico for the PR. 37. `fread(dec=',')` was able to guess `sep=','` and return an incorrect result, [#4483](https://github.com/Rdatatable/data.table/issues/4483). Thanks to Michael Chirico for reporting and fixing. It was already an error to provide both `sep=','` and `dec=','` manually. diff --git a/R/IDateTime.R b/R/IDateTime.R index 42a6b289a6..33d04b87c4 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -71,6 +71,11 @@ unique.IDate = x } +# define min and max to avoid base R's Inf with warning on empty, #2256 +min.IDate = max.IDate = function(x, ...) { + as.IDate(if (!length(x)) NA else NextMethod()) +} + # fix for #1315 as.list.IDate = function(x, ...) NextMethod() @@ -304,7 +309,7 @@ clip_msec = function(secs, action) { stopf("Valid options for ms are 'truncate', 'nearest', and 'ceil'.") ) } - + ################################################################### # Date - time extraction functions # Adapted from Hadley Wickham's routines cited below to ensure diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1d39b2d817..e5a1fb04fc 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -3422,10 +3422,22 @@ test(1083, setkeyv(ans[, list(r = .N), by=key(DT1)], key(ans)), check) # if the # Tests for #2531. `:=` loses POSIXct or ITime attribute: # first test from this SO post: http://stackoverflow.com/questions/15996692/cannot-assign-columns-as-date-by-reference-in-data-table +set.seed(1) dt <- data.table(date = as.IDate(sample(10000:11000, 10), origin = "1970-01-01")) dt[, group := rep(1:2, 5)] dt[, min.group.date := as.IDate(min(date)), by = group] -test(1084, class(dt$min.group.date), c("IDate", "Date")) +test(1084.1, class(dt$min.group.date), c("IDate", "Date")) + +# min.IDate on empty input NA, #2256 +# non-optimized grouping first: +test(1084.2, dt[, min(date[date>"1999-12-01"]), by=group], data.table(group=1:2, V1=as.IDate(c("1999-12-14",NA)))) +test(1084.3, dt[, max(date[date<"1997-08-01"]), by=group], data.table(group=1:2, V1=as.IDate(c(NA,"1997-07-19")))) +dt[group==2, date:=NA] # make group 2 an all-NA group +# GForce grouping with na.rm=FALSE|TRUE on the all-NA group +test(1084.4, dt[, min(date, na.rm=TRUE), by=group], data.table(group=1:2, V1=as.IDate(c("1997-12-06",NA)))) +test(1084.5, dt[, min(date), by=group], data.table(group=1:2, V1=as.IDate(c("1997-12-06",NA)))) +test(1084.6, dt[, max(date, na.rm=TRUE), by=group], data.table(group=1:2, V1=as.IDate(c("1999-12-14",NA)))) +test(1084.7, dt[, max(date), by=group], data.table(group=1:2, V1=as.IDate(c("1999-12-14",NA)))) dt <- data.table(date = as.IDate(sample(10000:11000, 10), origin = "1970-01-01")) dt[, group := rep(1:2, 5)]