diff --git a/NEWS.md b/NEWS.md index 60cb1dd0ce..3face7519b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -38,6 +38,11 @@ 1. `data.table(x=1, )`, where `` is an expression resulting in a 1-column matrix without column names, will eventually have names `x` and `V2`, not `x` and `V1`, consistent with `data.table(x=1, )` where `` results in an atomic vector, for example `data.table(x=1, cbind(1))` and `data.table(x=1, 1)` will both have columns named `x` and `V2`. In this release, the matrix case continues to be named `V1`, but the new behavior can be activated by setting `options(datatable.old.matrix.autoname)` to `FALSE`. See point 5 under Bug Fixes for more context; this change will provide more internal consistency as well as more consistency with `data.frame()`. +2. The behavior of `week()` will be changed in a future release to calculate weeks sequentially (days 1-7 as week 1), which is a potential breaking change. For now, the current "legacy" behavior, where week numbers advance every 7th day of the year (e.g., day 7 starts week 2), remains the default, and a deprecation warning will be issued when the old and new behaviors differ. Users can control this behavior with the temporary option `options(datatable.week = "...")`: + * `"sequential"`: Opt-in to the new, sequential behavior (no warning). + * `"legacy"`: Continue using the legacy behavior but suppress the deprecation warning. +See [#2611](https://github.com/Rdatatable/data.table/issues/2611) for details. Thanks @MichaelChirico for the report and @venom1204 for the implementation. + ### NEW FEATURES 1. New `sort_by()` method for data.tables, [#6662](https://github.com/Rdatatable/data.table/issues/6662). It uses `forder()` to improve upon the data.frame method and also matches `DT[order(...)]` behavior with respect to locale. Thanks @rikivillalba for the suggestion and PR. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2e1284aea6..609977b991 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18384,7 +18384,7 @@ x = c("1111-11-11", "2019-01-01", "2019-02-28", "2019-03-01", "2019-12-31", "202 test(2236.1, yday(x), c(315L, 1L, 59L, 60L, 365L, 60L, 61L, 366L, 1L, 366L, 60L, NA)) test(2236.2, mday(x), c(11L, 1L, 28L, 1L, 31L, 29L, 1L, 31L, 1L, 31L, 1L, NA)) test(2236.3, wday(x), c(7L, 3L, 5L, 6L, 3L, 7L, 1L, 5L, 1L, 2L, 2L, NA)) -test(2236.4, week(x), c(46L, 1L, 9L, 9L, 53L, 9L, 9L, 53L, 1L, 53L, 9L, NA)) +test(2236.4, options = c(datatable.week = "legacy"), week(x), c(46L, 1L, 9L, 9L, 53L, 9L, 9L, 53L, 1L, 53L, 9L, NA)) test(2236.5, month(x), c(11L, 1L, 2L, 3L, 12L, 2L, 3L, 12L, 1L, 12L, 3L, NA)) test(2236.6, quarter(x), c(4L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 4L, 1L, NA)) test(2236.7, year(x), c(1111L, 2019L, 2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2040L, 2040L, 2100L, NA)) @@ -21815,3 +21815,12 @@ test(2341.24, fread('a # leading cmnt b ', comment.char = '#', strip.white = FALSE, sep = ","), data.table(a=c(" ", "b"))) + +# week() sequential numbering fix tests #2611 +test(2342.1, options = c(datatable.week = "sequential"), week(as.IDate("1970-01-01") + 0:7), c(1L,1L,1L,1L,1L,1L,1L,2L)) +test(2342.2, options = c(datatable.week = "sequential"), week(as.IDate(c("2012-02-28","2012-02-29","2012-03-01"))), c(9L,9L,9L)) +test(2342.3, options = c(datatable.week = "sequential"), week(as.IDate(c("2019-12-31","2020-01-01"))), c(53L,1L)) +test(2342.4, options = c(datatable.week = "sequential"), week(as.IDate(c("2020-12-31","2021-01-01"))), c(53L,1L)) +test(2342.5, options = c(datatable.week = "sequential"), week(as.IDate("2021-01-06") + 0:6), c(1L,1L,2L,2L,2L,2L,2L)) +test(2342.6, options = c(datatable.week = "sequential"), week(as.IDate(c("2016-02-27","2016-02-28","2016-02-29","2016-03-01","2016-03-02"))), c(9L,9L,9L,9L,9L)) +test(2342.7, options = c(datatable.week = "default"), week(as.IDate("1970-01-07")), 2L, warning = "The default behavior of week() is changing") \ No newline at end of file diff --git a/src/idatetime.c b/src/idatetime.c index eaeb35a96f..ec3ce3ed3a 100644 --- a/src/idatetime.c +++ b/src/idatetime.c @@ -58,13 +58,11 @@ void convertSingleDate(int x, datetype type, void *out) int leap = !years1 && (years4 || !years100); - if (type == YDAY || type == WEEK) { + if (type == YDAY) { int yday = days + 31 + 28 + leap; if (yday >= YEARS1 + leap) yday -= YEARS1 + leap; *(int *)out = ++yday; - if (type == WEEK) - *(int *)out = (*(int *)out / 7) + 1; return; } @@ -143,6 +141,41 @@ SEXP convertDate(SEXP x, SEXP type) else if (!strcmp(ctype_str, "yearqtr")) { ctype = YEARQTR; ansint = false; } else internal_error(__func__, "invalid type, should have been caught before"); // # nocov + if (ctype == WEEK) { + SEXP ans = PROTECT(allocVector(INTSXP, n)); + int *ansp = INTEGER(ans); + + SEXP opt = GetOption(install("datatable.week"), R_NilValue); + const char *mode = isString(opt) && length(opt) == 1 ? CHAR(STRING_ELT(opt, 0)) : "default"; + + bool use_sequential = !strcmp(mode, "sequential"); + bool use_legacy = !strcmp(mode, "legacy"); + bool can_warn = !use_sequential && !use_legacy; + + for (int i = 0; i < n; i++) { + if (ix[i] == NA_INTEGER) { + ansp[i] = NA_INTEGER; + continue; + } + int yday; + convertSingleDate(ix[i], YDAY, &yday); + int new_week = ((yday - 1) / 7) + 1; + + if (use_sequential) { + ansp[i] = new_week; + } else { + int old_week = (yday / 7) + 1; + ansp[i] = old_week; + if (can_warn && new_week != old_week) { + warning(_("The default behavior of week() is changing. Previously ('legacy' mode), week numbers advanced every 7th day of the year. The new 'sequential' mode ensures the first week always has 7 days. For example, as.IDate('2023-01-07') returns week 2 in legacy mode but week 1 in sequential mode (week 2 starts on '2023-01-08'). To adopt the new behavior now, set options(datatable.week = 'sequential'). To keep the old results and silence this warning, set options(datatable.week = 'legacy'). See https://github.com/Rdatatable/data.table/issues/2611")); + can_warn = false; + } + } + } + UNPROTECT(1); + return ans; + } + if (ansint) { SEXP ans = PROTECT(allocVector(INTSXP, n)); int *ansp = INTEGER(ans);