Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@

10. It is now possible to join two tables on their common columns, so called _natural join_, [#629](https://github.com/Rdatatable/data.table/issues/629). Use `on=.NATURAL` or `options("datatable.naturaljoin"=TRUE)`. Latter one works only when `x` has no key, if key is present then key columns are being used to join as before. Thanks to David Kulp for request.

11. `as.data.table` gains `key` argument mirroring its use in `setDT` and `data.table`, [#890](https://github.com/Rdatatable/data.table/issues/890). As a byproduct, the arguments of `as.data.table.array` have changed order, which could affect code relying on positional arguments to this method. Thanks @cooldome for the suggestion and @MichaelChirico for implementation.

#### BUG FIXES

1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting.
Expand Down
57 changes: 30 additions & 27 deletions R/as.data.table.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
as.data.table <-function(x, keep.rownames=FALSE, ...)
as.data.table <-function(x, keep.rownames=FALSE, key=NULL, ...)
{
if (is.null(x))
return(null.data.table())
UseMethod("as.data.table")
}

as.data.table.default <- function(x, ...){
as.data.table(as.data.frame(x, ...)) # we cannot assume as.data.frame will do copy, thus setDT changed to as.data.table #3230
as.data.table(as.data.frame(x, ...), ...) # we cannot assume as.data.frame will do copy, thus setDT changed to as.data.table #3230
}

as.data.table.factor <- as.data.table.ordered <-
as.data.table.integer <- as.data.table.numeric <-
as.data.table.logical <- as.data.table.character <-
as.data.table.Date <- as.data.table.ITime <- function(x, keep.rownames=FALSE, ...) {
as.data.table.Date <- as.data.table.ITime <- function(x, keep.rownames=FALSE, key=NULL, ...) {
if (is.matrix(x)) {
return(as.data.table.matrix(x, ...))
}
Expand All @@ -27,21 +27,21 @@ as.data.table.Date <- as.data.table.ITime <- function(x, keep.rownames=FALSE, ..
nm = if (length(x) == 2L) if (is.character(keep.rownames)) keep.rownames[1L] else "rn"
setattr(x, 'names', c(nm, tt))
}
as.data.table.list(x, FALSE)
as.data.table.list(x, FALSE, key)
}

# as.data.table.table - FR #4848
as.data.table.table <- function(x, keep.rownames=FALSE, ...) {
as.data.table.table <- function(x, keep.rownames=FALSE, key=NULL, ...) {
# Fix for bug #5408 - order of columns are different when doing as.data.table(with(DT, table(x, y)))
val = rev(dimnames(provideDimnames(x)))
if (is.null(names(val)) || !any(nzchar(names(val))))
setattr(val, 'names', paste0("V", rev(seq_along(val))))
ans <- data.table(do.call(CJ, c(val, sorted=FALSE)), N = as.vector(x))
ans <- data.table(do.call(CJ, c(val, sorted=FALSE)), N = as.vector(x), key=key)
setcolorder(ans, c(rev(head(names(ans), -1L)), "N"))
ans
}

as.data.table.matrix <- function(x, keep.rownames=FALSE, ...) {
as.data.table.matrix <- function(x, keep.rownames=FALSE, key=NULL, ...) {
if (!identical(keep.rownames, FALSE)) {
# can specify col name to keep.rownames, #575
ans = data.table(rn=rownames(x), x, keep.rownames=FALSE)
Expand All @@ -50,7 +50,6 @@ as.data.table.matrix <- function(x, keep.rownames=FALSE, ...) {
return(ans)
}
d <- dim(x)
nrows <- d[1L]
ncols <- d[2L]
ic <- seq_len(ncols)
if (!ncols) return(null.data.table())
Expand All @@ -64,33 +63,37 @@ as.data.table.matrix <- function(x, keep.rownames=FALSE, ...) {
for (i in ic) value[[i]] <- as.vector(x[, i]) # to drop any row.names that would otherwise be retained inside every column of the data.table
}
col_labels <- dimnames(x)[[2L]]
setDT(value)
if (length(col_labels) == ncols) {
if (any(empty <- !nzchar(col_labels)))
col_labels[empty] <- paste0("V", ic[empty])
setattr(value, "names", col_labels)
setnames(value, col_labels)
} else {
setattr(value, "names", paste0("V", ic))
setnames(value, paste0("V", ic))
}
setattr(value,"row.names",.set_row_names(nrows))
setattr(value,"class",c("data.table","data.frame"))
alloc.col(value)
# setkey now to allow matrix column names as key
setkeyv(value, key)
value
}

# as.data.table.array - #1418
as.data.table.array <- function(x, keep.rownames=FALSE, sorted=TRUE, value.name="value", na.rm=TRUE, ...) {
as.data.table.array <- function(x, keep.rownames=FALSE, key=NULL, sorted=TRUE, value.name="value", na.rm=TRUE, ...) {
Comment thread
jangorecki marked this conversation as resolved.
dx = dim(x)
if (length(dx) <= 2L)
stop("as.data.table.array method should be only called for arrays with 3+ dimensions, for 2 dimensions matrix method should be used")
stop("as.data.table.array method should only be called for arrays with 3+ dimensions; use the matrix method for 2-dimensional arrays")
if (!is.character(value.name) || length(value.name)!=1L || is.na(value.name) || !nzchar(value.name))
stop("Argument 'value.name' must be scalar character, non-NA and non zero char")
stop("Argument 'value.name' must be scalar character, non-NA and at least one character")
if (!is.logical(sorted) || length(sorted)!=1L || is.na(sorted))
stop("Argument 'sorted' must be scalar logical and non-NA")
if (!is.logical(na.rm) || length(na.rm)!=1L || is.na(na.rm))
stop("Argument 'na.rm' must be scalar logical and non-NA")
if (!missing(sorted) && !is.null(key))
stop("Please provide either 'key' or 'sorted', but not both.")


dnx = dimnames(x)
# NULL dimnames will create integer keys, not character as in table method
val = rev(if (is.null(dnx)) lapply(dim(x), seq.int) else dnx)
val = rev(if (is.null(dnx)) lapply(dx, seq.int) else dnx)
if (is.null(names(val)) || all(!nzchar(names(val))))
setattr(val, 'names', paste0("V", rev(seq_along(val))))
if (value.name %chin% names(val))
Expand All @@ -102,12 +105,12 @@ as.data.table.array <- function(x, keep.rownames=FALSE, sorted=TRUE, value.name=
setnames(ans, "N", value.name)
dims = rev(head(names(ans), -1L))
setcolorder(ans, c(dims, value.name))
if (isTRUE(sorted))
setkeyv(ans, dims)
if (isTRUE(sorted) && is.null(key)) key = dims
setkeyv(ans, key)
ans[]
}

as.data.table.list <- function(x, keep.rownames=FALSE, ...) {
as.data.table.list <- function(x, keep.rownames=FALSE, key=NULL, ...) {
wn = sapply(x,is.null)
if (any(wn)) x = x[!wn]
if (!length(x)) return( null.data.table() )
Expand Down Expand Up @@ -148,10 +151,8 @@ as.data.table.list <- function(x, keep.rownames=FALSE, ...) {
setattr(xx, 'names', names(x)[nz])
x = xx
}
if (is.null(names(x))) setattr(x,"names",paste0("V",seq_len(length(x))))
setattr(x,"row.names",.set_row_names(max(n)))
setattr(x,"class",c("data.table","data.frame"))
alloc.col(x)
setDT(x, key=key) # copy ensured above; also, setDT handles naming
x
}

# don't retain classes before "data.frame" while converting
Expand All @@ -166,16 +167,16 @@ as.data.table.list <- function(x, keep.rownames=FALSE, ...) {
unique( c("data.table", "data.frame", tail(cx, length(cx)-n)) )
}

as.data.table.data.frame <- function(x, keep.rownames=FALSE, ...) {
as.data.table.data.frame <- function(x, keep.rownames=FALSE, key=NULL, ...) {
if (!identical(keep.rownames, FALSE)) {
# can specify col name to keep.rownames, #575
ans = data.table(rn=rownames(x), x, keep.rownames=FALSE)
ans = data.table(rn=rownames(x), x, keep.rownames=FALSE, key=key)
if (is.character(keep.rownames))
setnames(ans, 'rn', keep.rownames[1L])
return(ans)
}
ans = copy(x) # TO DO: change this deep copy to be shallow.
setattr(ans,"row.names",.set_row_names(nrow(x)))
setattr(ans, "row.names", .set_row_names(nrow(x)))

## NOTE: This test (#527) is no longer in effect ##
# for nlme::groupedData which has class c("nfnGroupedData","nfGroupedData","groupedData","data.frame")
Expand All @@ -185,6 +186,8 @@ as.data.table.data.frame <- function(x, keep.rownames=FALSE, ...) {
# fix for #1078 and #1128, see .resetclass() for explanation.
setattr(ans, "class", .resetclass(x, "data.frame"))
alloc.col(ans)
setkeyv(ans, key)
ans
}

as.data.table.data.table <- function(x, ...) {
Expand Down
9 changes: 7 additions & 2 deletions R/xts.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
as.data.table.xts <- function(x, keep.rownames = TRUE, ...) {
as.data.table.xts <- function(x, keep.rownames = TRUE, key=NULL, ...) {
stopifnot(requireNamespace("xts"), !missing(x), xts::is.xts(x))
# as.data.frame.xts will handle copying, and
# the error check above ensures as.data.frame.xts is applied
r = setDT(as.data.frame(x, row.names=NULL))
if (!keep.rownames) return(r[])
if ("index" %chin% names(x)) stop("Input xts object should not have 'index' column because it would result in duplicate column names. Rename 'index' column in xts or use `keep.rownames=FALSE` and add index manually as another column.")
r[, "index" := zoo::index(x)]
setcolorder(r, c("index", setdiff(names(r), "index")))[]
setcolorder(r, c("index", setdiff(names(r), "index")))
# save to end to allow for key='index'
setkeyv(r, key)
r[]
}

as.xts.data.table <- function(x, ...) {
Expand Down
81 changes: 72 additions & 9 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {

all.equal.data.table = data.table:::all.equal.data.table
any_na = data.table:::any_na
as.data.table.array = data.table:::as.data.table.array
as.IDate.default = data.table:::as.IDate.default
as.ITime.default = data.table:::as.ITime.default
binary = data.table:::binary
Expand Down Expand Up @@ -6516,6 +6517,14 @@ if (test_xts) {
setcolorder(dt, c(2, 3, 1))
dt[ , char_col := 'a']
test(1465.17, as.xts(dt), xt, warning = 'columns are not numeric')

# 890 -- key argument for as.data.table.xts
x = xts(1:10, as.Date(1:10, origin = "1970-01-01"))
test(1465.18, capture.output(as.data.table(x, key="index")),
c(" index V1", " 1: 1970-01-02 1", " 2: 1970-01-03 2",
" 3: 1970-01-04 3", " 4: 1970-01-05 4", " 5: 1970-01-06 5",
" 6: 1970-01-07 6", " 7: 1970-01-08 7", " 8: 1970-01-09 8",
" 9: 1970-01-10 9", "10: 1970-01-11 10"))

Sys.setenv("_R_CHECK_LENGTH_1_LOGIC2_" = TRUE)
}
Expand Down Expand Up @@ -11064,39 +11073,39 @@ test(1774.02, TRUE, all(
# 4D unnamed
x = array(1:81, dim=rep(3L,4))
dt = as.data.table(x, na.rm=FALSE)
test(1774.03, TRUE, all(
test(1774.03, all(
identical(dim(dt), c(81L,5L)),
identical(names(dt), c(paste0("V",1:4),"value")),
all(dt[J(1L)][1L, value] == 1L, dt[J(2L)][1L, value] == 2L, dt[J(3L)][.N, value] == 81L) # this also tests if dt is keyed
))
# 4D named dim values but not dims
x = array(1:81, dim=rep(3L, 4L), dimnames=rep(list(letters[1:3]), 4L))
dt = as.data.table(x, na.rm=FALSE)
test(1774.04, TRUE, all(
test(1774.04, all(
identical(dim(dt), c(81L,5L)),
identical(names(dt), c(paste0("V",1:4),"value")),
all(dt[J("a")][1L, value] == 1L, dt[J("b")][1L, value] == 2L, dt[J("c")][.N, value] == 81L)
))
# 4D named dim values and dims
x = array(1:81, dim=rep(3L, 4L), dimnames=setNames(rep(list(letters[1:3]), 4L), letters[1:4]))
dt = as.data.table(x, na.rm=FALSE)
test(1774.05, TRUE, all(
test(1774.05, all(
identical(dim(dt), c(81L,5L)),
identical(names(dt), c(letters[1:4],"value")),
all(dt[J("a")][1L, value] == 1L, dt[J("b")][1L, value] == 2L, dt[J("c")][.N, value] == 81L)
))
# third dim of length 1L so really 2D
x = array(1:4, dim=c(2L,2L,1L), dimnames=list(a=letters[1:2], b=letters[1:2], c="a"))
dt = as.data.table(x, na.rm=FALSE)
test(1774.06, TRUE, all(
test(1774.06, all(
identical(dim(dt), c(4L,4L)),
identical(names(dt), c("a","b","c","value")),
all(dt[J("a")][, value] == c(1L,3L), dt[J("b")][, value] == c(2L,4L))
))
# second and third dim of length 1L so really 1D
x = array(1:2, dim=c(2L,1L,1L), dimnames=list(a=letters[1:2], b="a", c="a"))
dt = as.data.table(x, na.rm=FALSE)
test(1774.07, TRUE, all(
test(1774.07, all(
identical(dim(dt), c(2L,4L)),
identical(names(dt), c("a","b","c","value")),
all(dt[J("a")][, value] == 1L, dt[J("b")][, value] == 2L)
Expand All @@ -11107,23 +11116,23 @@ x = rnorm(27)
x[sample(length(x), length(x)/2)] = NA
dim(x) = c(3L,3L,3L)
dt = as.data.table(x, na.rm=FALSE)
test(1774.08, TRUE, all(
test(1774.08, all(
identical(dim(dt), c(27L,4L)),
identical(names(dt), c(paste0("V",1:3),"value")),
dt[is.na(value), .N] > 0L,
length(key(dt)) > 0L
))
# na.rm=TRUE / sorted=TRUE
dt = as.data.table(x)
test(1774.09, TRUE, all(
test(1774.09, all(
identical(dim(dt), c(14L,4L)),
identical(names(dt), c(paste0("V",1:3),"value")),
dt[is.na(value), .N] == 0L,
length(key(dt)) > 0L
))
# na.rm=TRUE / sorted=FALSE
dt = as.data.table(x, sorted=FALSE)
test(1774.10, TRUE, all(
test(1774.10, all(
identical(dim(dt), c(14L,4L)),
identical(names(dt), c(paste0("V",1:3),"value")),
dt[is.na(value), .N] == 0L,
Expand All @@ -11132,7 +11141,7 @@ test(1774.10, TRUE, all(
))
# na.rm=FALSE / sorted=FALSE
dt = as.data.table(x, na.rm=FALSE, sorted=FALSE)
test(1774.11, TRUE, all(
test(1774.11, all(
identical(dim(dt), c(27L,4L)),
identical(names(dt), c(paste0("V",1:3),"value")),
is.unsorted(dt[[1]]),
Expand All @@ -11144,6 +11153,12 @@ test(1774.12, as.data.table(x, value.name="a"), error = "Argument 'value.name' s
x = array(1:2, dim=c(2L,1L,1L), dimnames=list(a=letters[1:2], b="a", value="a"))
test(1774.13, as.data.table(x), error = "Argument 'value.name' should not overlap with column names in result")

## unsupported usage of as.data.table.array
test(1774.14, as.data.table.array(as.matrix(x)), error="method should only be called for arrays with 3+")
test(1774.15, as.data.table(x, value.name=NA), error="'value.name' must be scalar")
test(1774.16, as.data.table(x, sorted='a'), error="'sorted' must be scalar")
test(1774.17, as.data.table(x, na.rm='a'), error="'na.rm' must be scalar")

# verify print.keys works
DT1 <- data.table(a = 1:3, key = "a")
test(1775.1, capture.output(print(DT1, print.keys = TRUE)),
Expand Down Expand Up @@ -14769,6 +14784,54 @@ setnames(d2, 1L, 'X1')
test(2045.15, d1[d2, verbose = TRUE], cbind(d1, X1 = d2$X1), output="natural join using: \\[.*[.]{3}\\]")
options(datatable.naturaljoin=FALSE)

#tests for adding key to as.data.table, #890
## as.data.table.numeric (should cover as.data.table.factor,
## *.ordered, *.integer, *.logical, *.character, and *.Date since
## all are the same function in as.data.table.R)
nn = c(a=0.1, c=0.2, b=0.3, d=0.4)
ans = data.table(nn, key='nn')
ans_rn = data.table(rn = names(nn), nn, key='rn')
test(2046.01, as.data.table(nn, key="nn"), ans)
test(2046.02, as.data.table(nn, keep.rownames=TRUE, key="rn"), ans_rn)

## as.data.table.data.frame
DF = as.data.frame(ans)
test(2046.03, as.data.table(DF, key="nn"), ans)

## as.data.table.data.table
DT = copy(ans)
test(2046.04, as.data.table(DT, key="nn"), ans)

## as.data.table.default
rr <- as.raw(3:1)
test(2046.05, as.data.table(rr, keep.rownames=TRUE, key="rn"),
data.table(rn = paste0(1:3), x = rr, key='rn'))

## as.data.table.list
l = as.list(ans)
test(2046.06, as.data.table(l, key='nn'), ans)

## as.data.table.matrix
mm <- as.matrix(ans)
test(2046.07, as.data.table(mm, key='nn'), ans)

## as.data.table.array
aa = array(nn, c(1L, 2L, 2L))
test(2046.08, as.data.table(aa, key='V3'),
data.table(V1 = 1L, V2 = rep(1:2, 2L), V3 = rep(1:2, each = 2L), value = as.vector(nn), key='V3'))
### conflict between sorted&key arguments
test(2046.09, as.data.table(aa, key='V3', sorted = TRUE), error="Please provide either 'key' or 'sorted'")

## as.data.table.table
tt <- as.table(nn)
test(2046.10, as.data.table(tt, key="N"),
data.table(V1 = names(tt), N = as.vector(tt), key='N'))

# some coverage tests uncovered by #890
test(2047.1, as.data.table(list(character(0L))), data.table(V1 = character(0L)))
test(2047.2, as.data.table(list()), data.table(NULL))
test(2047.3, as.data.table(rbind(1L)), data.table(V1 = 1L))


###################################
# Add new tests above this line #
Expand Down
Loading