Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,30 @@

1. New option `options(datatable.quiet = TRUE)` turns off the package startup message, [#3489](https://github.com/Rdatatable/data.table/issues/3489). `suppressPackageStartupMessages()` continues to work too. Thanks to @leobarlach for the suggestion inspired by `options(tidyverse.quiet = TRUE)`. We don't know of a way to make a package respect the `quietly=` option of `library()` and `require()` because the `quietly=` isn't passed through for use by the package's own `.onAttach`. If you can see how to do that, please submit a patch to R.

2. `rleid` functions now support long vectors (length > 2 billion).

3. Assigning to a list column no longer requires the RHS to be wrapped with `list` or `.()`, [#950](https://github.com/Rdatatable/data.table/issues/950).
```R
> DT = data.table(A=1:3, b=list(1:2,"foo",3:5))
> DT
A b
<int> <list>
1: 1 1,2
2: 2 foo
3: 3 3,4,5

# The following all accomplish the same assignment:
> DT[2, b:=letters[9:13]] # was error, now works
> DT[2, b:=.(letters[9:13])] # was error, now works
> DT[2, b:=.(list(letters[9:13]))] # .(list()) was needed, still works
> DT
A b
<int> <list>
1: 1 1,2
2: 2 i,j,k,l,m
3: 3 3,4,5
```

#### BUG FIXES

1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
Expand Down
16 changes: 10 additions & 6 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -589,12 +589,14 @@ replace_dot_alias <- function(e) {
if (!byjoin || nqbyjoin) {
# Really, `anyDuplicated` in base is AWESOME!
# allow.cartesian shouldn't error if a) not-join, b) 'i' has no duplicates
if (verbose) {last.started.at=proc.time();cat("Constructing irows for '!byjoin || nqbyjoin' ... ");flush.console()}
irows = if (allLen1) f__ else vecseq(f__,len__,
if( allow.cartesian ||
notjoin || # #698. When notjoin=TRUE, ignore allow.cartesian. Rows in answer will never be > nrow(x).
!anyDuplicated(f__, incomparables = c(0L, NA_integer_))) # #742. If 'i' has no duplicates, ignore
NULL
else as.double(nrow(x)+nrow(i))) # rows in i might not match to x so old max(nrow(x),nrow(i)) wasn't enough. But this limit now only applies when there are duplicates present so the reason now for nrow(x)+nrow(i) is just to nail it down and be bigger than max(nrow(x),nrow(i)).
if (allow.cartesian ||
notjoin || # #698. When notjoin=TRUE, ignore allow.cartesian. Rows in answer will never be > nrow(x).
!anyDuplicated(f__, incomparables = c(0L, NA_integer_))) {
NULL # #742. If 'i' has no duplicates, ignore
} else as.double(nrow(x)+nrow(i))) # rows in i might not match to x so old max(nrow(x),nrow(i)) wasn't enough. But this limit now only applies when there are duplicates present so the reason now for nrow(x)+nrow(i) is just to nail it down and be bigger than max(nrow(x),nrow(i)).
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
# Fix for #1092 and #1074
# TODO: implement better version of "any"/"all"/"which" to avoid
# unnecessary construction of logical vectors
Expand Down Expand Up @@ -629,10 +631,12 @@ replace_dot_alias <- function(e) {
if (length(xo) && length(irows)) {
irows = xo[irows] # TO DO: fsort here?
if (mult=="all" && !allGrp1) { # following #1991 fix, !allGrp1 will always be TRUE. TODO: revisit.
if (verbose) {last.started.at=proc.time();cat("Reorder irows for 'mult==\"all\" && !allGrp1' ... ");flush.console()}
irows = setorder(setDT(list(indices=rep.int(indices__, len__), irows=irows)))[["irows"]]
if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
}
}
if(optimizedSubset){
if (optimizedSubset){
## special treatment for calls like DT[x == 3] that are transformed into DT[J(x=3), on = "x==x"]

if(!.Call(CisOrderedSubset, irows, nrow(x))){
Expand Down
Binary file added inst/tests/issue_3400_fread.txt
Binary file not shown.
43 changes: 30 additions & 13 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -1016,8 +1016,10 @@ test(350.6, DT[c(0,0,0), .N], 0L)

# Test recycling list() on RHS of :=
DT = data.table(a=1:3,b=4:6,c=7:9,d=10:12)
test(351, DT[,c("a","b"):=list(13:15)], data.table(a=13:15,b=13:15,c=7:9,d=10:12))
test(352, DT[,letters[1:4]:=list(1L,NULL)], error="Supplied 4 columns to be assigned 2 items. Please see NEWS for v1.12.2")
test(351.1, DT[,c("a","b"):=list(13:15)], ans<-data.table(a=13:15,b=13:15,c=7:9,d=10:12))
test(351.2, DT[,c("a","b"):=13:15], ans)
test(352.1, DT[,letters[1:4]:=list(1L,NULL)], error="Supplied 4 columns to be assigned 2 items. Please see NEWS for v1.12.2")
test(352.2, DT[,letters[1:4]:=list(1L,NULL,2L,NULL)], data.table(a=c(1L,1L,1L),c=c(2L,2L,2L)))

# Test assigning new levels into factor columns
DT = data.table(f=factor(c("a","b")),x=1:4)
Expand Down Expand Up @@ -2110,6 +2112,7 @@ test(753.1, DT[,c("x1","x2"):=4:6, verbose = TRUE], data.table(a=letters[1:3],x=
test(753.2, DT[2,x2:=7L], data.table(a=letters[1:3],x=3:1,x1=4:6,x2=c(4L,7L,6L),key="a"))
DT = data.table(a=letters[3:1],x=1:3,y=4:6)
test(754, DT[,c("x1","y1","x2"):=list(x,y)], error="Supplied 3 columns to be assigned 2 items. Please see NEWS for v1.12.2")
test(754.0, DT[,c("x1","y1","x2"):=list(x,y,x)], data.table(a=letters[3:1],x=1:3,y=4:6,x1=1:3,y1=4:6,x2=1:3))
# And non-recycling i.e. that a single column copy does copy the column
DT = data.table(a=1:3)
test(754.1, DT[,b:=a][1,a:=4L][2,b:=5L], data.table(a=INT(4,2,3),b=INT(1,5,3)))
Expand Down Expand Up @@ -4869,10 +4872,10 @@ test(1294.17, dt[, c := NA]$c, rep(NA, 3))
test(1294.18, dt[, c := list(1)]$c, rep(TRUE, 3), warning="Coerced double RHS to logical")
test(1294.19, dt[, c := list(list(1))]$c, rep(TRUE, 3), warning="Coerced list RHS to logical")
test(1294.20, dt[, c := "bla"]$c, rep(NA, 3), warning="Coerced character RHS to logical")
test(1294.21, dt[, d := 1]$d, rep(list(1), 3), warning="Coerced double RHS to list")
test(1294.22, dt[, d := 1L]$d, rep(list(1L), 3), warning="Coerced integer RHS to list")
test(1294.23, dt[, d := TRUE]$d, rep(list(TRUE), 3), warning="Coerced logical RHS to list")
test(1294.24, dt[, d := "bla"]$d, rep(list("bla"), 3), warning="Coerced character RHS to list")
test(1294.21, dt[, d := 1]$d, rep(list(1), 3))
test(1294.22, dt[, d := 1L]$d, rep(list(1L), 3))
test(1294.23, dt[, d := TRUE]$d, rep(list(TRUE), 3))
test(1294.24, dt[, d := "bla"]$d, rep(list("bla"), 3))
test(1294.25, dt[, d := list(list(1))]$d, rep(list(1), 3))
test(1294.26, dt[, e := 1]$e, rep("1", 3), warning="Coerced double RHS to character")
test(1294.27, dt[, e := 1L]$e, rep("1", 3), warning="Coerced integer RHS to character")
Expand Down Expand Up @@ -6657,9 +6660,13 @@ test(1478.2, sapply(ll, length), INT(1,2,0,0))
test(1479, rbindlist(replicate(4,rbindlist(replicate(47, NULL),
use.names=TRUE, fill=TRUE)), use.names=TRUE, fill=TRUE), null.data.table())

# #936, assigning list column to a factor column by reference
DT <- data.table(x = factor(c("a", "b c", "d e f")))
test(1480, DT[, x := strsplit(as.character(x), " ")], data.table(x=list("a", letters[2:3], letters[4:6])))
# #936, plonking list column over a factor column by reference
DT = data.table(x = factor(c("a", "b c", "d e f")))
test(1480.1, DT[, x := strsplit(as.character(x), " ")], ans<-data.table(x=list("a", letters[2:3], letters[4:6])))
DT = data.table(x = factor(c("a", "b c", "d e f")))
test(1480.2, DT[, x := .(strsplit(as.character(x), " "))], ans)
DT = data.table(x = factor(c("a", "b c", "d e f")))
test(1480.3, DT[, x := list(strsplit(as.character(x), " "))], ans)

# #970, over-allocation issue
a=data.frame(matrix(1,ncol=101L))
Expand Down Expand Up @@ -13783,7 +13790,7 @@ test(2005.1, truelength(NULL), 0L)
DT = data.table(a=1:3, b=4:6)
test(2005.2, set(DT, 4L, "b", NA), error="i[1] is 4 which is out of range [1,nrow=3]")
test(2005.3, set(DT, 3L, 8i, NA), error="j is type 'complex'. Must be integer, character, or numeric is coerced with warning.")
test(2005.4, set(DT, 1L, 2L, expression(x+2)), error="RHS of assignment is not NULL, not an atomic vector (see ?is.atomic) and not a list column.")
test(2005.4, set(DT, 1L, 2L, expression(x+2)), error="(list) object cannot be coerced to type 'integer'") # R's error message same as returned by as.integer(expression(x+2))
DT[,foo:=factor(c("a","b","c"))]
test(2005.5, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)")
test(2005.6, DT[2, a:=9, verbose=TRUE], output="Coerced length-1 RHS from double to integer to match column's type. No precision was lost. If this")
Expand Down Expand Up @@ -13986,10 +13993,12 @@ setkey(DT, a, b)
setorder(DT, b)
test(2021.3, key(DT), NULL)

# assign RHS list better error msg, #950
# assign to list column works now when RHS is not list, #950
d = data.table(id=c("a","b"), f=list(function(x) x*2, function(x) x^2), key="id")
test(2022.1, d[.("a"), f:=list(function(x) x^3)], error="RHS of assignment is not NULL.*try wrapping it in an extra list.*")
test(2022.2, d[.("a"), f:=list(list(function(x) x^3))], data.table(id=c("a","b"), f=list(function(x) x^3, function(x) x^2), key="id"))
test(2022.1, d[.("a"), f:=function(x)x^3], data.table(id=c("a","b"), f=list(function(x) x^3, function(x) x^2), key="id"))
test(2022.2, d[.("a"), f:=list(function(x) x^4)], data.table(id=c("a","b"), f=list(function(x) x^4, function(x) x^2), key="id"))
test(2022.3, d[2, f:=6:8], data.table(id=c("a","b"), f=list(function(x) x^4, 6:8), key="id"))
test(2022.4, d[.("b"), f:=list(list(function(x) x^3))], data.table(id=c("a","b"), f=list(function(x) x^4, function(x) x^3), key="id"))

# keyby= used wrong index where "CLASS" is leading subset of characters of "CLASS_L3" and index exists on CLASS_L3, #3498
DT = data.table(
Expand All @@ -14003,6 +14012,14 @@ test(2023.4, indices(DT), "CLASS_L3")
test(2023.5, DT[, .N, keyby = CLASS], ans) # just this test failed in v1.12.2 and before due to using the CLASS_L3 index incorrectly
test(2023.6, DT[, .N, by = CLASS], data.table(CLASS=c("aaaa","dddd","gggg","eeee","ffff"), N=INT(7,1,3,2,1)))

# more verbose timings #1265
DT = data.table(x=c("a","b","c","b","a","c"), y=c(1,3,6,1,6,3), v=1:6)
setindex(DT, y)
test(2024, DT[y==6, v:=10L, verbose=TRUE], output=c("Constructing irows for.*", "Reorder irows for.*"))

# fread embedded NULL, #3400
test(2025, fread(testDir("issue_3400_fread.txt"), skip=1, header=TRUE), data.table(A=INT(1,3), B=INT(2,2), C=INT(3,1)))


###################################
# Add new tests above this line #
Expand Down
Loading