From 4ca513153619449a2b5c14bd9f6ecb9f502aaf1b Mon Sep 17 00:00:00 2001 From: jangorecki Date: Mon, 3 Jun 2019 17:05:13 +0530 Subject: [PATCH 1/2] attempt for #1738 --- src/forder.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/forder.c b/src/forder.c index c507aacc2a..6d4dd3f711 100644 --- a/src/forder.c +++ b/src/forder.c @@ -467,6 +467,43 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S // if n==1, the code is left to proceed below in case one or more of the 1-row by= columns are NA and na.last=NA. Otherwise it would be easy to return now. notFirst = false; + // first simple way, fail before test 1848 during bmerge typeof INT != DATE + for (int i=0; i 0) { + Rprintf("shallow wrapper\n"); + DT = shallowwrapper(dt, R_NilValue); + Rprintf("second loop\n"); + for (int i=0; i Date: Fri, 19 Jul 2019 17:45:26 -0700 Subject: [PATCH 2/2] change to a message in verbose mode for now, news item asking for user feedback, and added comments to forder.c on how to do the automatic coerce in future --- NEWS.md | 2 ++ inst/tests/tests.Rraw | 6 ++++++ src/forder.c | 49 +++++++++---------------------------------- 3 files changed, 18 insertions(+), 39 deletions(-) diff --git a/NEWS.md b/NEWS.md index 35f51f66ea..411a589857 100644 --- a/NEWS.md +++ b/NEWS.md @@ -133,6 +133,8 @@ 19. Type `complex` is now supported by `setkey`, `setorder`, `:=`, `by=`, `keyby=`, `shift`, `dcast`, `frank`, `rowid`, `rleid`, `CJ`, `coalesce`, `unique`, and `uniqueN`, [#3690](https://github.com/Rdatatable/data.table/issues/3690). Thanks to Gareth Ward and Elio Campitelli for their reports and input. Sorting `complex` is achieved the same way as base R; i.e., first by the real part then by the imaginary part (as if the `complex` column were two separate columns of `double`). There is no plan to support joining/merging on `complex` columns until a user demonstrates a need for that. +20. `setkey`, `[key]by=` and `on=` in verbose mode (`options(datatable.verbose=TRUE)`) now detect any columns inheriting from `Date` which are stored as 8 byte double, test if any fractions are present, and if not suggest using a 4 byte integer instead (such as `data.table::IDate`) to save space and time, [#1738](https://github.com/Rdatatable/data.table/issues/1738). In future this could be upgraded to `message` or `warning` depending on feedback. + #### BUG FIXES 1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5f149ab845..03fecf3da6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -15398,6 +15398,12 @@ test(2069.32, DT1[DT2, .(y = sum(b, na.rm=TRUE)), by=.EACHI, on=c(a = 'a', b="b" DT = data.table(z = 1i) test(2069.33, DT[DT, on = 'z'], error = "Type 'complex' not supported for joining/merging") +# forder verbose message when !isReallyReal Date, #1738 +DT = data.table(d=sample(seq(as.Date("2015-01-01"), as.Date("2015-01-05"), by="days"), 20, replace=TRUE)) +test(2070.01, typeof(DT$d), "double") +test(2070.02, DT[, .N, keyby=d, verbose=TRUE], output="Column 1.*date.*8 byte double.*no fractions are present.*4 byte integer.*to save space and time") + + ################################### # Add new tests above this line # ################################### diff --git a/src/forder.c b/src/forder.c index a9cf954535..e798a9eea9 100644 --- a/src/forder.c +++ b/src/forder.c @@ -424,6 +424,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S #endif int n_protect = 0; + const bool verbose = GetVerbose(); if (!isNewList(DT)) { if (!isVectorAtomic(DT)) error("Input is not either a list of columns, or an atomic vector."); @@ -469,43 +470,6 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S // if n==1, the code is left to proceed below in case one or more of the 1-row by= columns are NA and na.last=NA. Otherwise it would be easy to return now. notFirst = false; - // first simple way, fail before test 1848 during bmerge typeof INT != DATE - for (int i=0; i 0) { - Rprintf("shallow wrapper\n"); - DT = shallowwrapper(dt, R_NilValue); - Rprintf("second loop\n"); - for (int i=0; i