diff --git a/NEWS.md b/NEWS.md index f882aca806..7316e6af74 100644 --- a/NEWS.md +++ b/NEWS.md @@ -117,6 +117,8 @@ Thus, `with=` should no longer be needed in any cases. Please change to using th 19. `setindexv` can now assign multiple (separate) indices by accepting a `list` in the `cols` argument. +20. `as.matrix.data.table` method now has an additional `rownames` argument allowing for a single column to be used as the `rownames` after conversion to a `matrix`. Thanks to @sritchie73 for the suggestion, use cases, [#2692](https://github.com/Rdatatable/data.table/issues/2692) and implementation [PR#2702](https://github.com/Rdatatable/data.table/pull/2702) and @MichaelChirico for additional use cases. + #### BUG FIXES 1. The new quote rules handles this single field `"Our Stock Screen Delivers an Israeli Software Company (MNDO, CTCH)<\/a> SmallCapInvestor.com - Thu, May 19, 2011 10:02 AM EDT<\/cite><\/div>Yesterday in \""Google, But for Finding diff --git a/R/data.table.R b/R/data.table.R index b5e46e5b62..9c2d1c61fb 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1881,17 +1881,59 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # x #} - -as.matrix.data.table <- function(x,...) -{ - dm <- dim(x) - cn <- names(x) +as.matrix.data.table <- function(x, rownames, ...) { + rn <- NULL + rnc <- NULL + if (!missing(rownames)) { # Convert rownames to a column index if possible + if (length(rownames) == nrow(x)) { + # rownames argument is a vector of row names, no column in x to drop. + rn <- rownames + rnc <- NULL + } else if (!is.null(rownames) && length(rownames) != 1L) { # vector(0) will throw an error, but NULL will pass through + stop(sprintf("rownames must be a single column in x or a vector of row names of length nrow(x)=%d", nrow(x))) + } else if (!(is.null(rownames) || is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { + # E.g. because rownames is some sort of object that can't be converted to a column index + stop("rownames must be TRUE, a column index, a column name in x, or a vector of row names") + } else if (!is.null(rownames) && !is.na(rownames) && !identical(rownames, FALSE)) { # Handles cases where rownames is a column name, or key(x) from TRUE + if (identical(rownames, TRUE)) { + if (haskey(x)) { + rownames <- key(x) + if (length(rownames) > 1L) { + warning(sprintf("rownames is TRUE but multiple keys [%s] found for x; defaulting to first column x[,1]", + paste(rownames, collapse = ','), rownames[1L])) + rownames <- 1L + } + } else { + rownames <- 1L + } + } + if (is.character(rownames)) { + rnc <- chmatch(rownames, names(x)) + if (is.na(rnc)) stop(rownames, " is not a column of x") + } else { # rownames is an index already + if (rownames < 1L || rownames > ncol(x)) + stop(sprintf("rownames is %d which is outside the column number range [1,ncol=%d]", rownames, ncol(x))) + rnc <- rownames + } + } + } + # If the rownames argument has been used, and is a single column, + # extract that column's index (rnc) and drop it from x + if (!is.null(rnc)) { + rn <- x[[rnc]] + dm <- dim(x) - c(0, 1) + cn <- names(x)[-rnc] + X <- x[, .SD, .SDcols = cn] + } else { + dm <- dim(x) + cn <- names(x) + X <- x + } if (any(dm == 0L)) - return(array(NA, dim = dm, dimnames = list(NULL, cn))) + return(array(NA, dim = dm, dimnames = list(rn, cn))) p <- dm[2L] n <- dm[1L] collabs <- as.list(cn) - X <- x class(X) <- NULL non.numeric <- non.atomic <- FALSE all.logical <- TRUE @@ -1936,7 +1978,7 @@ as.matrix.data.table <- function(x,...) } X <- unlist(X, recursive = FALSE, use.names = FALSE) dim(X) <- c(n, length(X)/n) - dimnames(X) <- list(NULL, unlist(collabs, use.names = FALSE)) + dimnames(X) <- list(rn, unlist(collabs, use.names = FALSE)) X } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ef81215f19..ea3f1bffcb 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11572,6 +11572,30 @@ test(1898.1, set2key(DT, a), error="deprecated. Please use setindex() instead.") test(1898.2, set2keyv(DT, "a"), error="deprecated. Please use setindexv() instead.") test(1898.3, key2(DT), error="deprecated. Please use indices() instead.") +# Allow column to be used as rownames when converting to matrix #2702 +DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) +mat <- matrix(1:8, ncol = 2, dimnames=list(letters[1:4], c("X", "Y"))) +mat2 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(NULL, c("id", "X", "Y"))) +mat3 <- matrix(c(letters[1:4], 1:8), ncol=3, dimnames=list(1:4, c("id", "X", "Y"))) +test(1899.01, as.matrix(DT, 1), mat) +test(1899.02, as.matrix(DT, "id"), mat) +test(1899.03, as.matrix(DT, TRUE), mat) +setkey(DT, id) +test(1899.04, as.matrix(DT, TRUE), mat) +test(1899.05, as.matrix(DT, 1:4), mat3) +# errors +test(1899.06, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range") +test(1899.07, as.matrix(DT, "Z"), error="Z is not a column of x") +test(1899.08, as.matrix(DT, c(1,2)), error="rownames must be a single column in x or a vector of row names of length nrow(x)") +test(1899.09, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, a column name in x, or a vector of row names") +# values that pass through (rownames ignored) +test(1899.10, as.matrix(DT, NA), mat2) +test(1899.11, as.matrix(DT, NULL), mat2) +test(1899.12, as.matrix(DT, FALSE), mat2) +# Warnings: +setkey(DT, id, X) +test(1899.13, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple keys") + ################################### # Add new tests above this line # diff --git a/man/as.matrix.Rd b/man/as.matrix.Rd new file mode 100644 index 0000000000..f93f3ba89b --- /dev/null +++ b/man/as.matrix.Rd @@ -0,0 +1,63 @@ +\name{as.matrix} +\alias{as.matrix} +\alias{as.matrix.data.table} +\title{Convert a data.table to a matrix} +\description{ +Converts a \code{data.table} into a \code{matrix}, optionally using one +of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. +} +\usage{ +\method{as.matrix}{data.table}(x, rownames, ...)} + +\arguments{ +\item{x}{a \code{data.table}} +\item{rownames}{optional, a single column name or column index to use as +the \code{rownames} in the returned \code{matrix}. If \code{TRUE} the +\code{\link{key}} of the \code{data.table} will be used if it is a +single column, otherwise the first column in the \code{data.table} will +be used. Alternative a vector of length \code{nrow(x)} to assign as the +row names of the returned \code{matrix}.} +\item{\dots}{additional arguments to be passed to or from methods.} +} + +\details{ +\code{\link{as.matrix}} is a generic function in base R. It dispatches to +\code{as.matrix.data.table} if its \code{x} argument is a \code{data.table}. + +The method for \code{data.table}s will return a character matrix if there +are only atomic columns and any non-(numeric/logical/complex) column, +applying \code{\link{as.vector}} to factors and \code{\link{format}} to other +non-character columns. Otherwise, the usual coercion hierarchy (logical < +integer < double < complex) will be used, e.g., all-logical data frames +will be coerced to a logical matrix, mixed logical-integer will give an +integer matrix, etc. + +An additional argument \code{rownames} is provided for \code{as.matrix.data.table} +to facilitate conversions to matrices where the \code{\link{rownames}} are stored +in a single column of \code{x}, e.g. the first column after using +\code{\link{dcast.data.table}}. +} + +\value{ +A new \code{matrix} containing the contents of \code{x}. +} + +\seealso{ +\code{\link{data.table}}, \code{\link{as.matrix}}, \code{\link{data.matrix}} +\code{\link{array}} +} + +\examples{ +(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20)) +as.matrix(dt1) # character matrix +as.matrix(dt1, rownames = "A") +as.matrix(dt1, rownames = 1) +as.matrix(dt1, rownames = TRUE) + +(dt1 <- data.table(A = letters[1:10], X = 1:10, Y = 11:20)) +setkey(dt1, A) +as.matrix(dt1, rownames = TRUE) +} + +\keyword{ array } +