From c0b32a60466bed0e63420ec105bc75c34590865e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 8 Jul 2025 19:25:06 +0000 Subject: [PATCH 1/2] use faster base implementation for isoweek --- NEWS.md | 2 ++ R/IDateTime.R | 13 +++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 139faf62e3..f55cb5277c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -42,6 +42,8 @@ 8. `groupingsets()` gets a new argument `enclos` for use together with the `jj` argument in functions wrapping `groupingsets()`, including the existing wrappers `rollup()` and `cube()`, [#5560](https://github.com/Rdatatable/data.table/issues/5560). When forwarding a `j`-expression as `groupingsets(jj = substitute(j))`, make sure to pass `enclos = parent.frame()` as well, so that the `j`-expression will be evaluated in the right context. This makes it possible for `j` to refer to variables outside the `data.table`. Thanks @sindribaldur for the report and @aitap for the fix. +9. `isoweek()` is much faster (e.g. 20x) by re-using an implementation from {base}, [#5111](https://github.com/Rdatatable/data.table/issues/5111). Thanks @MichaelChirico for the report and PR. + ### BUG FIXES 1. Custom binary operators from the `lubridate` package now work with objects of class `IDate` as with a `Date` subclass, [#6839](https://github.com/Rdatatable/data.table/issues/6839). Thanks @emallickhossain for the report and @aitap for the fix. diff --git a/R/IDateTime.R b/R/IDateTime.R index 856dbeaa20..383ceb54e7 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -342,7 +342,8 @@ yday = function(x) convertDate(as.IDate(x), "yday") wday = function(x) convertDate(as.IDate(x), "wday") mday = function(x) convertDate(as.IDate(x), "mday") week = function(x) convertDate(as.IDate(x), "week") -isoweek = function(x) { +# TODO(#3279): Investigate if improved as.IDate() makes our below implementation faster than this +isoweek = function(x) as.integer(format(as.IDate(x), "%V")) # ISO 8601-conformant week, as described at # https://en.wikipedia.org/wiki/ISO_week_date # Approach: @@ -350,11 +351,11 @@ isoweek = function(x) { # * Find the number of weeks having passed between # January 1st of the year of the nearest Thursdays and x - x = as.IDate(x) # number of days since 1 Jan 1970 (a Thurs) - nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L)) - year_start = as.IDate(format(nearest_thurs, '%Y-01-01')) - 1L + (nearest_thurs - year_start) %/% 7L -} +# x = as.IDate(x) # number of days since 1 Jan 1970 (a Thurs) +# nearest_thurs = as.IDate(7L * (as.integer(x + 3L) %/% 7L)) +# year_start = as.IDate(format(nearest_thurs, '%Y-01-01')) +# 1L + (nearest_thurs - year_start) %/% 7L + month = function(x) convertDate(as.IDate(x), "month") quarter = function(x) convertDate(as.IDate(x), "quarter") From 1afb35970e5d1a8737d25662df0812e52012e628 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 8 Jul 2025 12:47:39 -0700 Subject: [PATCH 2/2] Add an atime regression test --- .ci/atime/tests.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.ci/atime/tests.R b/.ci/atime/tests.R index e8ccd26290..761ba3c2bb 100644 --- a/.ci/atime/tests.R +++ b/.ci/atime/tests.R @@ -277,5 +277,14 @@ test.list <- atime::atime_test_list( Slow = "73d79edf8ff8c55163e90631072192301056e336", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/8397dc3c993b61a07a81c786ca68c22bc589befc) Fast = "8397dc3c993b61a07a81c786ca68c22bc589befc"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7019/commits) that removes inefficiency + "isoweek improved in #7144" = atime::atime_test( + setup = { + set.seed(349) + x = sample(Sys.Date() - 0:5000, N, replace=TRUE) + }, + expr = data.table::isoweek(x), + Slow = "548410d23dd74b625e8ea9aeb1a5d2e9dddd2927", # Parent of the first commit in the PR (https://github.com/Rdatatable/data.table/commit/548410d23dd74b625e8ea9aeb1a5d2e9dddd2927) + Fast = "c0b32a60466bed0e63420ec105bc75c34590865e"), # Commit in the PR (https://github.com/Rdatatable/data.table/pull/7144/commits) that uses a much faster implementation + tests=extra.test.list) # nolint end: undesirable_operator_linter.