Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@

18. Integer values that are too large to fit in `int64` will now be read as strings [#2250](https://github.com/Rdatatable/data.table/issues/2250).


19. Internal-only `.shallow` now retains keys correctly, [#2336](https://github.com/Rdatatable/data.table/issues/2336). Thanks to @MarkusBonsch for reporting, fixing ([PR #2337](https://github.com/Rdatatable/data.table/pull/2337)) and adding 37 tests. This much advances the journey towards exporting `shallow()`, [#2323](https://github.com/Rdatatable/data.table/issues/2323).

#### NOTES

1. `?data.table` makes explicit the option of using a `logical` vector in `j` to select columns, [#1978](https://github.com/Rdatatable/data.table/issues/1978). Thanks @Henrik-P for the note and @MichaelChirico for filing.
Expand Down
39 changes: 33 additions & 6 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2355,14 +2355,41 @@ point <- function(to, to_idx, from, from_idx) {
isnull = is.null(cols)
if (!isnull) cols = validate(cols, x) # NULL is default = all columns
ans = .Call(Cshallowwrapper, x, cols) # copies VECSXP only
if (retain.key && isnull) return(ans) # handle most frequent case first
# rest of the cases
cols = names(x)[cols]
retain.key = retain.key && identical(cols, head(key(x), length(cols)))
setattr(ans, 'sorted', if (haskey(x) && retain.key) cols else NULL)

if(retain.key){
if(isnull) return(ans) # handle most frequent case first
## get correct key if cols are present
cols = names(x)[cols]
keylength <- which.first(!key(ans) %chin% cols) - 1L
if(is.na(keylength)) keylength <- length(key(ans))
if(!keylength){
setattr(ans, "sorted", NULL) ## no key remaining
} else {
setattr(ans, "sorted", head(key(ans), keylength)) ## keep what can be kept
}
## take care of attributes.
indices <- names(attributes(attr(ans, "index")))
for(index in indices){
indexcols <- strsplit(index, split = "__")[[1]][-1L]
indexlength <- which.first(!indexcols %chin% cols) - 1L
if(is.na(indexlength)) next ## all columns are present, nothing to be done
reducedindex <- paste0(c("", indexcols[seq_len(indexlength)]), collapse = "__") ## the columns until the first missing form the new index
if(reducedindex %chin% indices || !indexlength){
## Either reduced index already present or no columns of the original index remain.
## Drop the original index completely
setattr(attr(ans, "index", exact = TRUE), index, NULL)
} else {
## rename index to reducedindex
names(attributes(attr(ans, "index")))[names(attributes(attr(ans, "index"))) == index] <- reducedindex
}
}
} else { # retain.key == FALSE
setattr(ans, "sorted", NULL)
setattr(ans, "index", NULL)
}
if (unlock) setattr(ans, '.data.table.locked', NULL)
ans
# TODO: check/remove attributes for secondary keys?

}

shallow <- function(x, cols=NULL) {
Expand Down
5 changes: 1 addition & 4 deletions R/foverlaps.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,6 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by.
## hopefully all checks are over. Now onto the actual task at hand.
origx = x; x = shallow(x, by.x)
origy = y; y = shallow(y, by.y)
if (identical(by.x, key(origx)[seq_along(by.x)]))
setattr(x, 'sorted', by.x)
setattr(y, 'sorted', by.y) ## is definitely sorted on by.y
roll = switch(type, start=, end=, equal= 0.0, any=, within= +Inf)
make_call <- function(names, fun=NULL) {
if (is.character(names))
Expand Down Expand Up @@ -113,7 +110,7 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by.
if (verbose) {cat(round(proc.time()[3]-last.started.at,3),"secs\n");flush.console}
matches <- function(ii, xx, del, ...) {
cols = setdiff(names(xx), del)
xx = shallow(xx, cols)
xx = .shallow(xx, cols, retain.key = FALSE)
ans = bmerge(xx, ii, seq_along(xx), seq_along(xx), haskey(xx), integer(0), mult=mult, ops=rep(1L, length(xx)), integer(0), 1L, verbose=verbose, ...)
# vecseq part should never run here, but still...
if (ans$allLen1) ans$starts else vecseq(ans$starts, ans$lens, NULL)
Expand Down
50 changes: 49 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -6843,11 +6843,59 @@ setkey(x1, a1, a2)
test(1544.1, setDF(merge(x1, y)), merge(as.data.frame(x1), as.data.frame(y)))
test(1544.2, setDF(merge(x1, y, by="a2")), merge(as.data.frame(x1), as.data.frame(y), by="a2"))
# also test shallow here so as to catch future regressions
x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1L, 3L, 2L), key="a1,a2")
x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1L, 3L, 2L), a3 = c(TRUE, FALSE, TRUE), key="a1,a2")
test(1545.1, key(.shallow(x1, cols="a2")), NULL)
test(1545.2, key(.shallow(x1, retain.key=FALSE)), NULL)
test(1545.2, key(.shallow(x1, cols = "a1", retain.key=FALSE)), NULL)
test(1545.3, key(.shallow(x1, retain.key=TRUE)), key(x1))
test(1545.4, key(.shallow(x1, cols="a1", retain.key=TRUE)), "a1")
# tests for #2336. .shallow drops keys unnecessarily
test(1545.5, key(.shallow(x1, cols=c("a1", "a3"), retain.key=TRUE)), "a1")
test(1545.6, .shallow(x1, cols=c("a3", "a1"), retain.key=TRUE), .shallow(x1, cols=c("a3", "a1"), retain.key=TRUE))
test(1545.7, key(.shallow(x1, cols=c("a1", "a2", "a3"), retain.key=TRUE)), c("a1", "a2"))
test(1545.8, key(.shallow(x1, cols=c("a2", "a3"), retain.key=TRUE)), NULL)
test(1545.9, key(.shallow(x1, cols=c("a2"), retain.key=TRUE)), NULL)
test(1545.10, key(.shallow(x1, cols=c("a3"), retain.key=TRUE)), NULL)
setkey(x1, NULL)
test(1545.11, key(.shallow(x1, retain.key=TRUE)), NULL)
test(1545.111, key(.shallow(x1, retain.key=FALSE)), NULL)
test(1545.12, key(.shallow(x1, cols=c("a1", "a2"), retain.key=TRUE)), NULL)
test(1545.121, key(.shallow(x1, cols=c("a1", "a2"), retain.key=FALSE)), NULL)
x1 <- x1[0]
test(1545.13, key(.shallow(x1, retain.key=TRUE)), NULL)
test(1545.131, key(.shallow(x1, retain.key=FALSE)), NULL)
test(1545.132, key(.shallow(x1, cols = c("a1"), retain.key=FALSE)), NULL)
test(1545.133, key(.shallow(x1, cols=c("a1", "a2"), retain.key=TRUE)), NULL)
setkey(x1, a1)
test(1545.134, key(.shallow(x1, retain.key=FALSE)), NULL)
test(1545.135, key(.shallow(x1, cols = "a2", retain.key=FALSE)), NULL)
test(1545.136, key(.shallow(x1, retain.key=TRUE)), "a1")
test(1545.137, key(.shallow(x1, cols=c("a1", "a2"), retain.key=TRUE)), "a1")
test(1545.138, key(.shallow(x1, cols=c("a3"), retain.key=TRUE)), NULL)

# tests for #2336. .shallow now retains indices as well
x1 <- data.table(a1 = c('a', 'a', 'a', 'a', 'b', 'c'), a2 = c(2L, 2L, 1L, 2L, 3L, 2L), a3 = c(FALSE, TRUE, TRUE, FALSE, FALSE, TRUE), key="a1,a2")
setindex(x1, a1, a2, a3)
setindex(x1, a1, a3)
test(1545.15, indices(.shallow(x1, retain.key=FALSE)), NULL)
test(1545.16, indices(.shallow(x1, cols = "a1", retain.key=FALSE)), NULL)
test(1545.17, indices(.shallow(x1, retain.key=TRUE)), indices(x1))
test(1545.18, forderv(.shallow(x1, retain.key=TRUE)[attr(attr(.shallow(x1, retain.key=TRUE), "index"), "__a1__a2__a3")], c("a1", "a2", "a3")), integer(0))
test(1545.19, forderv(.shallow(x1, retain.key=TRUE)[attr(attr(.shallow(x1, retain.key=TRUE), "index"), "__a1__a3")], c("a1", "a3")), integer(0))
test(1545.20, forderv(.shallow(x1, retain.key=TRUE)[attr(attr(.shallow(x1, retain.key=TRUE), "index"), "__a1")], c("a1")), integer(0))
test(1545.21, indices(.shallow(x1, cols = "a1", retain.key=TRUE)), c("a1"))
test(1545.22, forderv(.shallow(x1, cols = "a1", retain.key=TRUE)[attr(attr(.shallow(x1, cols = "a1", retain.key=TRUE), "index"), "__a1")], c("a1")), integer(0))
test(1545.23, attributes(attr(.shallow(x1, cols = c("a1", "a2"), retain.key = TRUE), "index", exact = TRUE)), attributes(attr(.shallow(x1, cols = c("a2", "a1"), retain.key = TRUE), "index", exact = TRUE)))
test(1545.24, indices(.shallow(x1, cols = c("a1", "a2"), retain.key=TRUE)), c("a1__a2", "a1"))
test(1545.25, forderv(.shallow(x1, cols = c("a1", "a2"), retain.key=TRUE)[attr(attr(.shallow(x1, cols = c("a1", "a2"), retain.key=TRUE), "index"), "__a1")], c("a1")), integer(0))
test(1545.26, forderv(.shallow(x1, cols = c("a1", "a2"), retain.key=TRUE)[attr(attr(.shallow(x1, cols = c("a1", "a2"), retain.key=TRUE), "index"), "__a1__a2")], c("a1", "a2")), integer(0))
test(1545.27, indices(.shallow(x1, cols = c("a1", "a3"), retain.key=TRUE)), c("a1", "a1__a3"))
test(1545.28, forderv(.shallow(x1, cols = c("a1", "a3"), retain.key=TRUE)[attr(attr(.shallow(x1, cols = c("a1", "a3"), retain.key=TRUE), "index"), "__a1")], c("a1")), integer(0))
test(1545.29, forderv(.shallow(x1, cols = c("a1", "a3"), retain.key=TRUE)[attr(attr(.shallow(x1, cols = c("a1", "a3"), retain.key=TRUE), "index"), "__a1__a3")], c("a1", "a3")), integer(0))
test(1545.30, indices(.shallow(x1, cols = c("a2", "a3"), retain.key=TRUE)), NULL)
test(1545.31, indices(.shallow(x1, cols = c("a3"), retain.key=TRUE)), NULL)
test(1545.32, .shallow(x1, cols = c("a1", "a2", "a3"), retain.key=TRUE), .shallow(x1, retain.key=TRUE))


# test for #1234
df1 = df2 = data.frame(cats = rep(c('', ' ', 'meow'), 5))
Expand Down