From fc4d67c732142f9a581e0d54e9daedaa6d001e69 Mon Sep 17 00:00:00 2001 From: Ethan Smith <24379655+ethanbsmith@users.noreply.github.com> Date: Tue, 30 Nov 2021 18:48:38 -0700 Subject: [PATCH 1/5] clean pr --- NEWS.md | 2 ++ R/xts.R | 13 ++++++++----- inst/tests/tests.Rraw | 5 +++++ man/as.xts.data.table.Rd | 7 ++++--- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index 61759c24d8..cb49520d74 100644 --- a/NEWS.md +++ b/NEWS.md @@ -520,6 +520,8 @@ 51. `merge.data.table()` silently ignored the `incomparables` argument, [#2587](https://github.com/Rdatatable/data.table/issues/2587). It is now implemented and any other ignored arguments (e.g. misspellings) are now warned about. Thanks to @GBsuperman for the report and @ben-schwen for the fix. +52. `as.xts.data.table` now supports non-numeric xts coredata matrixes, [5268](https://github.com/Rdatatable/data.table/issues/5268). Existing numeric only functionality is supported by a new `numeric.only` parameter, which defaults to `TRUE` for backward compatability and the most common use case. To convert non-numeric columns, set this parameter to `FALSE`. Conversions of `data.table` columns to a `matrix` now uses `data.table::as.matrix`, with all its performance benefits. Thanks to @ethanbsmith for the report and fix. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : diff --git a/R/xts.R b/R/xts.R index 005f0f6024..0e47697dbe 100644 --- a/R/xts.R +++ b/R/xts.R @@ -15,11 +15,14 @@ as.data.table.xts = function(x, keep.rownames = TRUE, key=NULL, ...) { r[] } -as.xts.data.table = function(x, ...) { +as.xts.data.table = function(x, numeric.only = TRUE, ...) { stopifnot(requireNamespace("xts"), !missing(x), is.data.table(x)) if (!xts::is.timeBased(x[[1L]])) stopf("data.table must have a time based column in first position, use `setcolorder` function to change the order, or see ?timeBased for supported types") - colsNumeric = vapply_1b(x, is.numeric)[-1L] # exclude first col, xts index - if (!all(colsNumeric)) warningf("Following columns are not numeric and will be omitted: %s", brackify(names(colsNumeric)[!colsNumeric])) - r = setDF(x[, .SD, .SDcols = names(colsNumeric)[colsNumeric]]) - return(xts::as.xts(r, order.by = if ("IDate" %chin% class(x[[1L]])) as.Date(x[[1L]]) else x[[1L]])) + r <- x[, -1L]# exclude first col, xts index + if (numeric.only) { + colsNumeric = vapply_1b(r, is.numeric) + if (!all(colsNumeric)) warningf("Following columns are not numeric and will be omitted: %s", brackify(names(colsNumeric)[!colsNumeric])) + r <- r[, .SD, .SDcols = names(colsNumeric)[colsNumeric]] + } + return(xts::xts(as.matrix(r), order.by = if ("IDate" %chin% class(x[[1L]])) as.Date(x[[1L]]) else x[[1L]])) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ffa0a95ac3..4187d49817 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6861,6 +6861,11 @@ if (test_xts) { M = xts::as.xts(matrix(1, dimnames=list("2021-05-23", "x"))) # xts:: just to be extra robust; shouldn't be needed with rm(as.xts) above test(1465.19, inherits(as.data.table(M)$index,"POSIXct")) + #5268 support non-numeric xts + xts.bool = xts(x = c(T,F), order.by = Sys.Date() + (1:2)) + colnames(xts.bool) = "value" + test(1465.20, identical(xts.bool, as.xts(as.data.table(xts.bool), numeric.only = F))) + Sys.setenv("_R_CHECK_LENGTH_1_LOGIC2_" = TRUE) } diff --git a/man/as.xts.data.table.Rd b/man/as.xts.data.table.Rd index 1f42cceab0..61fbf3dd40 100644 --- a/man/as.xts.data.table.Rd +++ b/man/as.xts.data.table.Rd @@ -2,13 +2,14 @@ \alias{as.xts.data.table} \title{Efficient data.table to xts conversion} \description{ - Efficient conversion of data.table to xts, data.table must have \emph{POSIXct} or \emph{Date} type in first column. + Efficient conversion of data.table to xts, data.table must have a time based type in first column. See ?xts::timeBased for supported types } \usage{ -as.xts.data.table(x, \dots) +as.xts.data.table(x, numeric.only = TRUE, \dots) } \arguments{ -\item{x}{data.table to convert to xts, must have \emph{POSIXct} or \emph{Date} in the first column. All others non-numeric columns will be omitted with warning.} +\item{x}{data.table to convert to xts, must have a time based first column. As xts objects are indexed matrixes, all columns must be of the same type. If columns of multiple types are selected, standard as.matrix rules are applied during the conversion. } +\item{\numeric.only}{If TRUE, only include numeric columns in the conversion and all non-numeric columns will be omitted with warning} \item{\dots}{ignored, just for consistency with generic method.} } \seealso{ \code{\link{as.data.table.xts}} } From ccce13f9447a94bc2a8734aea532e98ee32eed31 Mon Sep 17 00:00:00 2001 From: Ethan Smith <24379655+ethanbsmith@users.noreply.github.com> Date: Tue, 30 Nov 2021 19:01:26 -0700 Subject: [PATCH 2/5] syntax error --- man/as.xts.data.table.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/as.xts.data.table.Rd b/man/as.xts.data.table.Rd index 61fbf3dd40..1328229edb 100644 --- a/man/as.xts.data.table.Rd +++ b/man/as.xts.data.table.Rd @@ -9,7 +9,7 @@ as.xts.data.table(x, numeric.only = TRUE, \dots) } \arguments{ \item{x}{data.table to convert to xts, must have a time based first column. As xts objects are indexed matrixes, all columns must be of the same type. If columns of multiple types are selected, standard as.matrix rules are applied during the conversion. } -\item{\numeric.only}{If TRUE, only include numeric columns in the conversion and all non-numeric columns will be omitted with warning} +\item{numeric.only}{If TRUE, only include numeric columns in the conversion and all non-numeric columns will be omitted with warning} \item{\dots}{ignored, just for consistency with generic method.} } \seealso{ \code{\link{as.data.table.xts}} } From edc8fd3e178e28fb0482111d93f174fcdbe4cafb Mon Sep 17 00:00:00 2001 From: Ethan Smith <24379655+ethanbsmith@users.noreply.github.com> Date: Wed, 1 Dec 2021 06:30:00 -0700 Subject: [PATCH 3/5] Update tests.Rraw --- inst/tests/tests.Rraw | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4187d49817..17d3edaacc 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6861,10 +6861,10 @@ if (test_xts) { M = xts::as.xts(matrix(1, dimnames=list("2021-05-23", "x"))) # xts:: just to be extra robust; shouldn't be needed with rm(as.xts) above test(1465.19, inherits(as.data.table(M)$index,"POSIXct")) - #5268 support non-numeric xts - xts.bool = xts(x = c(T,F), order.by = Sys.Date() + (1:2)) - colnames(xts.bool) = "value" - test(1465.20, identical(xts.bool, as.xts(as.data.table(xts.bool), numeric.only = F))) + #5268 non-numeric xts coredata + x = xts::xts(x = c(T,F), order.by = Sys.Date() + (1:2)) + colnames(x) = "value" + test(1465.20, identical(x, as.xts(as.data.table(x), numeric.only = F))) Sys.setenv("_R_CHECK_LENGTH_1_LOGIC2_" = TRUE) } From 327822f8b6f1e377d4dad8e623042b5cb2395d35 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Thu, 2 Dec 2021 16:01:40 -0700 Subject: [PATCH 4/5] inherits rather than %chin% to read slightly better --- R/xts.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/xts.R b/R/xts.R index 0e47697dbe..234f36cac6 100644 --- a/R/xts.R +++ b/R/xts.R @@ -24,5 +24,5 @@ as.xts.data.table = function(x, numeric.only = TRUE, ...) { if (!all(colsNumeric)) warningf("Following columns are not numeric and will be omitted: %s", brackify(names(colsNumeric)[!colsNumeric])) r <- r[, .SD, .SDcols = names(colsNumeric)[colsNumeric]] } - return(xts::xts(as.matrix(r), order.by = if ("IDate" %chin% class(x[[1L]])) as.Date(x[[1L]]) else x[[1L]])) + return(xts::xts(as.matrix(r), order.by = if (inherits(x[[1L]], "IDate")) as.Date(x[[1L]]) else x[[1L]])) } From 13281895c2dac0d4a4f138a9656ef5462e7cbfd1 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Thu, 2 Dec 2021 16:19:30 -0700 Subject: [PATCH 5/5] T/F => TRUE/FALSE in test --- inst/tests/tests.Rraw | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 17d3edaacc..5325f6f6d2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6861,10 +6861,10 @@ if (test_xts) { M = xts::as.xts(matrix(1, dimnames=list("2021-05-23", "x"))) # xts:: just to be extra robust; shouldn't be needed with rm(as.xts) above test(1465.19, inherits(as.data.table(M)$index,"POSIXct")) - #5268 non-numeric xts coredata - x = xts::xts(x = c(T,F), order.by = Sys.Date() + (1:2)) - colnames(x) = "value" - test(1465.20, identical(x, as.xts(as.data.table(x), numeric.only = F))) + # non-numeric xts coredata, #5268 + x = xts::xts(x=c(TRUE,FALSE), order.by=Sys.Date()+(1:2)) + colnames(x) = "value" # perhaps relates to #4897 + test(1465.20, identical(x, as.xts(as.data.table(x), numeric.only=FALSE))) Sys.setenv("_R_CHECK_LENGTH_1_LOGIC2_" = TRUE) }