From 378689e2d8e147ce8827c9d2402ee54599a71978 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Sun, 17 Jun 2018 20:07:16 +1000 Subject: [PATCH 1/5] Closes #2930 Migrating to commits to a new branch. Passing a column to use as the rownames to as.matrix.data.table() now works when the input data.table has a single row. --- NEWS.md | 2 ++ R/data.table.R | 9 +++++++-- inst/tests/tests.Rraw | 7 +++++++ man/as.matrix.Rd | 7 ++++++- 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index ad95f1cb01..1bb6f889e6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,8 @@ #### BUG FIXES +1. Passing a column to use as the rownames to `as.matrix.data.table()` now works when the input `data.table` has a single row, [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting, @sritchie73 for fixing, and @HughParsonage for feedback + #### NOTES diff --git a/R/data.table.R b/R/data.table.R index 1f70c83d9c..4e9391f6e5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1887,14 +1887,19 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # x #} -as.matrix.data.table <- function(x, rownames, ...) { +as.matrix.data.table <- function(x, rownames, rownames.literal=FALSE, ...) { rn <- NULL rnc <- NULL if (!missing(rownames)) { # Convert rownames to a column index if possible - if (length(rownames) == nrow(x)) { + if (length(rownames) == nrow(x) && nrow(x) > 1) { # rownames argument is a vector of row names, no column in x to drop. rn <- rownames rnc <- NULL + } else if (length(rownames) == nrow(x) && nrow(x) == 1 && isTRUE(rownames.literal)) { + # When x has a single row, the user may still want to supply vector of rownames, + # but we need to distinguish from the case when the rownames is a column of x. + rn <- rownames + rnc <- NULL } else if (!is.null(rownames) && length(rownames) != 1L) { # vector(0) will throw an error, but NULL will pass through stop(sprintf("rownames must be a single column in x or a vector of row names of length nrow(x)=%d", nrow(x))) } else if (!(is.null(rownames) || is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4384ab81b4..c54c21435e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11702,6 +11702,13 @@ test(1899.12, as.matrix(DT, FALSE), mat2) setkey(DT, id, X) test(1899.13, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple keys") +# Check handling of cases where the data.table only has 1 row, raised by Issue #2930: +mat4 <- matrix(c("a", 1, 5), nrow=1, dimnames=list(c("x"), c("id", "X", "Y"))) +test(1899.14, as.matrix(DT[1,], 1), mat[1,,drop=FALSE]) +test(1899.15, as.matrix(DT[1,], "id"), mat[1,,drop=FALSE]) +test(1899.16, as.matrix(DT[1,], "x", rownames.literal=TRUE), mat4) # "x" not a column in DT, so use "x" as the +test(1899.17, as.matrix(DT[1,], c("x", "y"), rownames.literal=TRUE), error="rownames must be a single column in x or a vector of row names of length nrow(x)") + # index argument for fread, #2633 DT_str = c('a,b\n3,1\n2,2\n1,1\n2,1\n3,2') test(1900.1, attributes(attr(fread(DT_str, index = 'a'), 'index')), diff --git a/man/as.matrix.Rd b/man/as.matrix.Rd index f93f3ba89b..02f11e701e 100644 --- a/man/as.matrix.Rd +++ b/man/as.matrix.Rd @@ -7,7 +7,7 @@ Converts a \code{data.table} into a \code{matrix}, optionally using one of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. } \usage{ -\method{as.matrix}{data.table}(x, rownames, ...)} +\method{as.matrix}{data.table}(x, rownames, rownames.literal, ...)} \arguments{ \item{x}{a \code{data.table}} @@ -17,6 +17,11 @@ the \code{rownames} in the returned \code{matrix}. If \code{TRUE} the single column, otherwise the first column in the \code{data.table} will be used. Alternative a vector of length \code{nrow(x)} to assign as the row names of the returned \code{matrix}.} +\item{rownames.literal}{logical, optional, when \code{x} is a +\code{data.table} that has only a single row, set \code{rownames.literal} +to \code{TRUE} if you want to use the vector supplied to the \code{rownames} +argument as the row names of the returned \code{matrix} instead of looking +for \code{rownames} as a column of \code{x}.} \item{\dots}{additional arguments to be passed to or from methods.} } From a398fe13ac89262c4209b0c1ae1474a891108733 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Mon, 18 Jun 2018 10:46:19 +1000 Subject: [PATCH 2/5] Fixed documentation --- NEWS.md | 2 +- man/as.matrix.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1bb6f889e6..787a3d50af 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,7 +7,7 @@ #### BUG FIXES -1. Passing a column to use as the rownames to `as.matrix.data.table()` now works when the input `data.table` has a single row, [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting, @sritchie73 for fixing, and @HughParsonage for feedback +1. Passing a column to use as the rownames to `as.matrix.data.table()` now works when the input `data.table` has a single row, [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting, @sritchie73 for fixing, and @HughParsonage and @jangorecki for feedback. #### NOTES diff --git a/man/as.matrix.Rd b/man/as.matrix.Rd index 02f11e701e..0cdf76103f 100644 --- a/man/as.matrix.Rd +++ b/man/as.matrix.Rd @@ -7,7 +7,7 @@ Converts a \code{data.table} into a \code{matrix}, optionally using one of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. } \usage{ -\method{as.matrix}{data.table}(x, rownames, rownames.literal, ...)} +\method{as.matrix}{data.table}(x, rownames, rownames.literal=FALSE, ...)} \arguments{ \item{x}{a \code{data.table}} From 9f2e54414ca87f51be0d74b4e3ffcfc08113bfb0 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Mon, 18 Jun 2018 16:49:42 +1000 Subject: [PATCH 3/5] ... to \dots --- man/as.matrix.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/as.matrix.Rd b/man/as.matrix.Rd index 0cdf76103f..079058ebb9 100644 --- a/man/as.matrix.Rd +++ b/man/as.matrix.Rd @@ -7,7 +7,7 @@ Converts a \code{data.table} into a \code{matrix}, optionally using one of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. } \usage{ -\method{as.matrix}{data.table}(x, rownames, rownames.literal=FALSE, ...)} +\method{as.matrix}{data.table}(x, rownames, rownames.literal=FALSE, \dots)} \arguments{ \item{x}{a \code{data.table}} From f1dc0ed6a367ceed6758ad27be5531c779fe9018 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Wed, 15 Aug 2018 15:17:37 +0100 Subject: [PATCH 4/5] Removed rownames.literal, added rownames.values --- NEWS.md | 2 +- R/data.table.R | 24 +++++++++++++++--------- inst/tests/tests.Rraw | 12 +++++++----- man/as.matrix.Rd | 12 ++++-------- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/NEWS.md b/NEWS.md index db28a00bdd..63febb80e3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,7 +13,7 @@ 3. `as.ITime.character` now properly handles NA when attempting to detect the format of non-NA values in vector. Thanks @polyjian for reporting, closes [#2940](https://github.com/Rdatatable/data.table/issues/2940). -4. Passing a column to use as the rownames to `as.matrix.data.table()` now works when the input `data.table` has a single row, [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting, @sritchie73 for fixing, and @HughParsonage and @jangorecki for feedback. +4. `as.matrix.data.table()` gains an additional argument, `rownames.values`, that allows the user to supply their own vector of rownames to use in the returned matrix. This deprecates this functionality previously available through the `rownames` argument, which will now throw a warning suggesting to use `rownames.values` instead, and in future releases will throw an error. Additionally, passing a column to use as the `rownames` to `as.matrix.data.table()` now works when the input `data.table` has a single row and closes [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting, @sritchie73 for fixing, and @HughParsonage, @jangorecki, and @mattdowle for feedback. #### NOTES diff --git a/R/data.table.R b/R/data.table.R index b20eb64330..7d02840853 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1887,24 +1887,30 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # x #} -as.matrix.data.table <- function(x, rownames, rownames.literal=FALSE, ...) { +as.matrix.data.table <- function(x, rownames, rownames.values, ...) { rn <- NULL rnc <- NULL - if (!missing(rownames)) { # Convert rownames to a column index if possible + if (!missing(rownames) && !missing(rownames.values) && !is.null(rownames) && !is.null(rownames.values)) { + stop("rownames and rownames.value cannot both be used at the same time") + } else if (!missing(rownames.values) && !is.null(rownames.values)) { # user provided vector of rownames + if (length(rownames.values) != nrow(x)) { + stop(sprintf("rownames.values must be a vector of row names of length nrow(x)=%d", nrow(x))) + } + rn <- rownames.values + rnc <- NULL + } else if (!missing(rownames)) { # Convert rownames to a column index if possible + if (length(rownames) > 1 && length(rownames) == nrow(x)) { + warning("length(rownames) > 1 is deprecated. rownames.values should be used in the future when supplying your own vector of row names") + } if (length(rownames) == nrow(x) && nrow(x) > 1) { # rownames argument is a vector of row names, no column in x to drop. rn <- rownames rnc <- NULL - } else if (length(rownames) == nrow(x) && nrow(x) == 1 && isTRUE(rownames.literal)) { - # When x has a single row, the user may still want to supply vector of rownames, - # but we need to distinguish from the case when the rownames is a column of x. - rn <- rownames - rnc <- NULL } else if (!is.null(rownames) && length(rownames) != 1L) { # vector(0) will throw an error, but NULL will pass through - stop(sprintf("rownames must be a single column in x or a vector of row names of length nrow(x)=%d", nrow(x))) + stop("rownames must be a single column in x") } else if (!(is.null(rownames) || is.logical(rownames) || is.character(rownames) || is.numeric(rownames))) { # E.g. because rownames is some sort of object that can't be converted to a column index - stop("rownames must be TRUE, a column index, a column name in x, or a vector of row names") + stop("rownames must be TRUE, a column index, or a column name in x") } else if (!is.null(rownames) && !is.na(rownames) && !identical(rownames, FALSE)) { # Handles cases where rownames is a column name, or key(x) from TRUE if (identical(rownames, TRUE)) { if (haskey(x)) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0dea51dfeb..4fc9848b2d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11688,12 +11688,12 @@ test(1899.02, as.matrix(DT, "id"), mat) test(1899.03, as.matrix(DT, TRUE), mat) setkey(DT, id) test(1899.04, as.matrix(DT, TRUE), mat) -test(1899.05, as.matrix(DT, 1:4), mat3) +test(1899.05, as.matrix(DT, 1:4), mat3, warning="deprecated") # errors test(1899.06, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range") test(1899.07, as.matrix(DT, "Z"), error="Z is not a column of x") -test(1899.08, as.matrix(DT, c(1,2)), error="rownames must be a single column in x or a vector of row names of length nrow(x)") -test(1899.09, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, a column name in x, or a vector of row names") +test(1899.08, as.matrix(DT, c(1,2)), error="rownames must be a single column in x") +test(1899.09, as.matrix(DT, complex(1)), error="rownames must be TRUE, a column index, or a column name in x") # values that pass through (rownames ignored) test(1899.10, as.matrix(DT, NA), mat2) test(1899.11, as.matrix(DT, NULL), mat2) @@ -11706,8 +11706,10 @@ test(1899.13, as.matrix(DT, TRUE), mat, warning="rownames is TRUE but multiple k mat4 <- matrix(c("a", 1, 5), nrow=1, dimnames=list(c("x"), c("id", "X", "Y"))) test(1899.14, as.matrix(DT[1,], 1), mat[1,,drop=FALSE]) test(1899.15, as.matrix(DT[1,], "id"), mat[1,,drop=FALSE]) -test(1899.16, as.matrix(DT[1,], "x", rownames.literal=TRUE), mat4) # "x" not a column in DT, so use "x" as the -test(1899.17, as.matrix(DT[1,], c("x", "y"), rownames.literal=TRUE), error="rownames must be a single column in x or a vector of row names of length nrow(x)") +# Check that rownames.values works: +test(1899.16, as.matrix(DT[1,], rownames.values="x"), mat4) # "x" not a column in DT, so use "x" as the +test(1899.17, as.matrix(DT[1,], rownames.values=c("x", "y")), error="rownames.values must be a vector of row names of length nrow(x)") +test(1899.18, as.matrix(DT, rownames=TRUE, rownames.values=1:nrow(DT)), error="rownames and rownames.value cannot both be used at the same time") # index argument for fread, #2633 DT_str = c('a,b\n3,1\n2,2\n1,1\n2,1\n3,2') diff --git a/man/as.matrix.Rd b/man/as.matrix.Rd index 079058ebb9..b8d882e612 100644 --- a/man/as.matrix.Rd +++ b/man/as.matrix.Rd @@ -7,7 +7,7 @@ Converts a \code{data.table} into a \code{matrix}, optionally using one of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. } \usage{ -\method{as.matrix}{data.table}(x, rownames, rownames.literal=FALSE, \dots)} +\method{as.matrix}{data.table}(x, rownames, rownames.values, \dots)} \arguments{ \item{x}{a \code{data.table}} @@ -15,13 +15,9 @@ of the columns in the \code{data.table} as the \code{matrix} \code{rownames}. the \code{rownames} in the returned \code{matrix}. If \code{TRUE} the \code{\link{key}} of the \code{data.table} will be used if it is a single column, otherwise the first column in the \code{data.table} will -be used. Alternative a vector of length \code{nrow(x)} to assign as the -row names of the returned \code{matrix}.} -\item{rownames.literal}{logical, optional, when \code{x} is a -\code{data.table} that has only a single row, set \code{rownames.literal} -to \code{TRUE} if you want to use the vector supplied to the \code{rownames} -argument as the row names of the returned \code{matrix} instead of looking -for \code{rownames} as a column of \code{x}.} +be used.} +\item{rownames.values}{optional, a vector of values to use as the +\code{rownames} in the returned \code{matrix}.} \item{\dots}{additional arguments to be passed to or from methods.} } From 25b126e88dc9ac57102f2dcadee3c9e2e5504fd0 Mon Sep 17 00:00:00 2001 From: Scott Ritchie Date: Thu, 16 Aug 2018 10:40:43 +0100 Subject: [PATCH 5/5] Removed warning --- NEWS.md | 2 +- R/data.table.R | 7 ++++--- inst/tests/tests.Rraw | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 63febb80e3..43ed36327c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,7 +13,7 @@ 3. `as.ITime.character` now properly handles NA when attempting to detect the format of non-NA values in vector. Thanks @polyjian for reporting, closes [#2940](https://github.com/Rdatatable/data.table/issues/2940). -4. `as.matrix.data.table()` gains an additional argument, `rownames.values`, that allows the user to supply their own vector of rownames to use in the returned matrix. This deprecates this functionality previously available through the `rownames` argument, which will now throw a warning suggesting to use `rownames.values` instead, and in future releases will throw an error. Additionally, passing a column to use as the `rownames` to `as.matrix.data.table()` now works when the input `data.table` has a single row and closes [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting, @sritchie73 for fixing, and @HughParsonage, @jangorecki, and @mattdowle for feedback. +4. `as.matrix.data.table()` gains an additional argument, `rownames.values`, that allows the user to supply their own vector of rownames to use in the returned matrix. This will deprecates this functionality previously available through the `rownames` argument, which will throw a warning suggesting to use `rownames.values` instead in the next release, and in future releases will throw an error. Additionally, passing a column to use as the `rownames` to `as.matrix.data.table()` now works when the input `data.table` has a single row and closes [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting, @sritchie73 for fixing, and @HughParsonage, @jangorecki, and @mattdowle for feedback. #### NOTES diff --git a/R/data.table.R b/R/data.table.R index 7d02840853..b97c6d387c 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1899,9 +1899,10 @@ as.matrix.data.table <- function(x, rownames, rownames.values, ...) { rn <- rownames.values rnc <- NULL } else if (!missing(rownames)) { # Convert rownames to a column index if possible - if (length(rownames) > 1 && length(rownames) == nrow(x)) { - warning("length(rownames) > 1 is deprecated. rownames.values should be used in the future when supplying your own vector of row names") - } + # TODO: uncomment in next release, then change to stop() in the release after that + #if (length(rownames) > 1 && length(rownames) == nrow(x)) { + # warning("length(rownames) > 1 is deprecated. rownames.values should be used in the future when supplying your own vector of row names") + #} if (length(rownames) == nrow(x) && nrow(x) > 1) { # rownames argument is a vector of row names, no column in x to drop. rn <- rownames diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4fc9848b2d..15d6eaae94 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11688,7 +11688,7 @@ test(1899.02, as.matrix(DT, "id"), mat) test(1899.03, as.matrix(DT, TRUE), mat) setkey(DT, id) test(1899.04, as.matrix(DT, TRUE), mat) -test(1899.05, as.matrix(DT, 1:4), mat3, warning="deprecated") +test(1899.05, as.matrix(DT, 1:4), mat3) # errors test(1899.06, as.matrix(DT, -1), error="rownames is -1 which is outside the column number range") test(1899.07, as.matrix(DT, "Z"), error="Z is not a column of x")