diff --git a/NEWS.md b/NEWS.md index bc33f21534..43ed36327c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,8 @@ 3. `as.ITime.character` now properly handles NA when attempting to detect the format of non-NA values in vector. Thanks @polyjian for reporting, closes [#2940](https://github.com/Rdatatable/data.table/issues/2940). +4. `as.matrix.data.table()` gains an additional argument, `rownames.values`, that allows the user to supply their own vector of rownames to use in the returned matrix. This will deprecates this functionality previously available through the `rownames` argument, which will throw a warning suggesting to use `rownames.values` instead in the next release, and in future releases will throw an error. Additionally, passing a column to use as the `rownames` to `as.matrix.data.table()` now works when the input `data.table` has a single row and closes [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting, @sritchie73 for fixing, and @HughParsonage, @jangorecki, and @mattdowle for feedback. + #### NOTES 1. The type coercion warning message has been improved, [#2989](https://github.com/Rdatatable/data.table/pull/2989). Thanks to @sarahbeeysian on [Twitter](https://twitter.com/sarahbeeysian/status/1021359529789775872) for highlighting. For example, given the follow statements: diff --git a/R/data.table.R b/R/data.table.R index 1c35f062fd..b97c6d387c 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1887,19 +1887,31 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # x #} -as.matrix.data.table <- function(x, rownames, ...) { +as.matrix.data.table <- function(x, rownames, rownames.values, ...) { rn <- NULL rnc <- NULL - if (!missing(rownames)) { # Convert rownames to a column index if possible - if (length(rownames) == nrow(x)) { + if (!missing(rownames) && !missing(rownames.values) && !is.null(rownames) && !is.null(rownames.values)) { + stop("rownames and rownames.value cannot both be used at the same time") + } else if (!missing(rownames.values) && !is.null(rownames.values)) { # user provided vector of rownames + if (length(rownames.values) != nrow(x)) { + stop(sprintf("rownames.values must be a vector of row names of length nrow(x)=%d", nrow(x))) + } + rn <- rownames.values + rnc <- NULL + } else if (!missing(rownames)) { # Convert rownames to a column index if possible + # TODO: uncomment in next release, then change to stop() in the release after that + #if (length(rownames) > 1 && length(rownames) == nrow(x)) { + # warning("length(rownames) > 1 is deprecated. rownames.values should be used in the future when supplying your own vector of row names") + #} + if (length(rownames) == nrow(x) && nrow(x) > 1) { # rownames argument is a vector of row names, no column in x to drop. rn <- rownames rnc <- NULL } else if (!is.null(rownames) && length(rownames) != 1L) { # vector(0) will throw an error, but NULL will pass through - stop(sprintf("rownames must be a single column in x or a vector of row names of length nrow(x)=%d", nrow(x))) + stop("rownames must be a single column in x") } else if (!(is.null(rownames) || is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { # E.g. because rownames is some sort of object that can't be converted to a column index - stop("rownames must be TRUE, a column index, a column name in x, or a vector of row names") + stop("rownames must be TRUE, a column index, or a column name in x") } else if (!is.null(rownames) && !is.na(rownames) && !identical(rownames, FALSE)) { # Handles cases where rownames is a column name, or key(x) from TRUE if (identical(rownames, TRUE)) { if (haskey(x)) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c4b2f4dd91..15d6eaae94 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11692,8 +11692,8 @@ test(1899.05, as.matrix(DT, 1:4), mat3) # errors test(1899.06, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range") test(1899.07, as.matrix(DT, "Z"), error="Z is not a column of x") -test(1899.08, as.matrix(DT, c(1,2)), error="rownames must be a single column in x or a vector of row names of length nrow(x)") -test(1899.09, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, a column name in x, or a vector of row names") +test(1899.08, as.matrix(DT, c(1,2)), error="rownames must be a single column in x") +test(1899.09, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, or a column name in x") # values that pass through (rownames ignored) test(1899.10, as.matrix(DT, NA), mat2) test(1899.11, as.matrix(DT, NULL), mat2) @@ -11702,6 +11702,15 @@ test(1899.12, as.matrix(DT, FALSE), mat2) setkey(DT, id, X) test(1899.13, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple keys") +# Check handling of cases where the data.table only has 1 row, raised by Issue #2930: +mat4 <- matrix(c("a", 1, 5), nrow=1, dimnames=list(c("x"), c("id", "X", "Y"))) +test(1899.14, as.matrix(DT[1,], 1), mat[1,,drop=FALSE]) +test(1899.15, as.matrix(DT[1,], "id"), mat[1,,drop=FALSE]) +# Check that rownames.values works: +test(1899.16, as.matrix(DT[1,], rownames.values="x"), mat4) # "x" not a column in DT, so use "x" as the +test(1899.17, as.matrix(DT[1,], rownames.values=c("x", "y")), error="rownames.values must be a vector of row names of length nrow(x)") +test(1899.18, as.matrix(DT, rownames=TRUE, rownames.values=1:nrow(DT)), error="rownames and rownames.value cannot both be used at the same time") + # index argument for fread, #2633 DT_str = c('a,b\n3,1\n2,2\n1,1\n2,1\n3,2') test(1900.1, attributes(attr(fread(DT_str, index = 'a'), 'index')), diff --git a/man/as.matrix.Rd b/man/as.matrix.Rd index f93f3ba89b..b8d882e612 100644 --- a/man/as.matrix.Rd +++ b/man/as.matrix.Rd @@ -7,7 +7,7 @@ Converts a \code{data.table} into a \code{matrix}, optionally using one of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. } \usage{ -\method{as.matrix}{data.table}(x, rownames, ...)} +\method{as.matrix}{data.table}(x, rownames, rownames.values, \dots)} \arguments{ \item{x}{a \code{data.table}} @@ -15,8 +15,9 @@ of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. the \code{rownames} in the returned \code{matrix}. If \code{TRUE} the \code{\link{key}} of the \code{data.table} will be used if it is a single column, otherwise the first column in the \code{data.table} will -be used. Alternative a vector of length \code{nrow(x)} to assign as the -row names of the returned \code{matrix}.} +be used.} +\item{rownames.values}{optional, a vector of values to use as the +\code{rownames} in the returned \code{matrix}.} \item{\dots}{additional arguments to be passed to or from methods.} }