diff --git a/DESCRIPTION b/DESCRIPTION index 9e5302f2bb..8ab2deaa0d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -63,7 +63,8 @@ Authors@R: c( person("Dirk","Eddelbuettel", role="ctb"), person("Ben","Schwen", role="ctb"), person("Tony","Fischetti", role="ctb"), - person("Ofek","Shilon", role="ctb")) + person("Ofek","Shilon", role="ctb"), + person("Vadim","Khotilovich", role="ctb")) Depends: R (>= 3.1.0) Imports: methods Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown diff --git a/NEWS.md b/NEWS.md index b80eb35c63..fd48fdcade 100644 --- a/NEWS.md +++ b/NEWS.md @@ -125,6 +125,8 @@ 15. `print(x, col.names='none')` now removes the column names as intended for wide `data.table`s whose column names don't fit on a single line, [#4270](https://github.com/Rdatatable/data.table/issues/4270). Thanks to @tdhock for the report, and Michael Chirico for fixing. +16. `DT[, min(colB), by=colA]` when `colB` is type `character` would miss blank strings (`""`) at the beginning of a group and return the smallest non-blank instead of blank, [#4848](https://github.com/Rdatatable/data.table/issues/4848). Thanks to Vadim Khotilovich for reporting and for the PR fixing it. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 48c502581e..df6b7bf6a4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -5164,7 +5164,8 @@ test(1313.22, DT[, list(y=max(y, na.rm=TRUE)), by=x], DT[c(5,10)]) # for character set.seed(1L) -DT <- data.table(x=rep(1:6, each=3), y=sample(c("", letters[1:3], NA), 18, TRUE)) +DT <- data.table(x=rep(1:7, each=3), y=sample(c("", letters[1:3], NA), 21, TRUE)) +DT[x==7, y := c("","b","c")] test(1313.23, DT[, min(y), by=x], DT[, base::min(y), by=x]) test(1313.24, DT[, max(y), by=x], DT[, base::max(y), by=x]) test(1313.25, DT[, min(y, na.rm=TRUE), by=x], DT[, base::min(y, na.rm=TRUE), by=x]) @@ -5172,8 +5173,8 @@ test(1313.26, DT[, max(y, na.rm=TRUE), by=x], DT[, base::max(y, na.rm=TRUE), by= DT[x==6, y := NA_character_] test(1313.27, DT[, min(y), by=x], DT[, base::min(y), by=x]) test(1313.28, DT[, max(y), by=x], DT[, base::max(y), by=x]) -test(1313.29, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c("a","a","c","","a",NA)), warning="No non-missing") -test(1313.30, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:6, V1=c("b","a","c","a","c",NA)), warning="No non-missing") +test(1313.29, DT[, min(y, na.rm=TRUE), by=x], data.table(x=1:7, V1=c("a","a","c","","a",NA,"")), warning="No non-missing") +test(1313.30, DT[, max(y, na.rm=TRUE), by=x], data.table(x=1:7, V1=c("b","a","c","a","c",NA,"c")), warning="No non-missing") # bug 700 - bmerge, roll=TRUE and nomatch=0L when i's key group occurs more than once dt1 <- data.table(structure(list(x = c(7L, 33L), y = structure(c(15912, 15912), class = "Date"), z = c(626550.35284, 7766.385)), .Names = diff --git a/src/data.table.h b/src/data.table.h index 9fb386567d..50d43a34a5 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -89,6 +89,7 @@ extern SEXP char_ordered; extern SEXP char_datatable; extern SEXP char_dataframe; extern SEXP char_NULL; +extern SEXP char_maxString; extern SEXP sym_sorted; extern SEXP sym_index; extern SEXP sym_BY; diff --git a/src/gsumm.c b/src/gsumm.c index 9c31f4a761..064131f3cd 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -760,15 +760,15 @@ SEXP gmin(SEXP x, SEXP narm) case STRSXP: ans = PROTECT(allocVector(STRSXP, ngrp)); protecti++; if (!LOGICAL(narm)[0]) { - for (i=0; i