From 045ba8a3f13f10490afc15cc2a3316854eb0425c Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 9 Feb 2016 19:50:55 -0500 Subject: [PATCH] adds option for dplyr-inspired column class summary with printing attempting test fix again gave up on second test fixing error in .Rd --- R/data.table.R | 23 ++++++++++++++++---- R/onLoad.R | 1 + README.md | 2 ++ inst/tests/tests.Rraw | 20 +++++++++++++++++ man/print.data.table.Rd | 48 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 man/print.data.table.Rd diff --git a/R/data.table.R b/R/data.table.R index fb7b8de46f..31249d414a 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -89,10 +89,11 @@ setPackageName("data.table",.global) # So even though .BY doesn't appear in this file, it should still be NULL here and exported because it's # defined in SDenv and can be used by users. -print.data.table <- function(x, - topn=getOption("datatable.print.topn"), # (5) print the top topn and bottom topn rows with '---' inbetween - nrows=getOption("datatable.print.nrows"), # (100) under this the whole (small) table is printed, unless topn is provided - row.names = TRUE, quote = FALSE, ...) +print.data.table <- + function(x, topn = getOption("datatable.print.topn"), # (5) print the top topn and bottom topn rows with '---' inbetween + nrows = getOption("datatable.print.nrows"), # (100) under this the whole (small) table is printed, unless topn is provided + print.class = getOption("datatable.print.class"), # (FALSE) whether to include beneath each column a summary of its class + row.names = TRUE, quote= FALSE, ...) { if (.global$print!="" && address(x)==.global$print) { # The !="" is to save address() calls and R's global cache of address strings # := in [.data.table sets .global$print=address(x) to suppress the next print i.e., like <- does. See FAQ 2.22 and README item in v1.9.5 @@ -141,6 +142,20 @@ print.data.table <- function(x, # FR #5020 - add row.names = logical argument to print.data.table if (isTRUE(row.names)) rownames(toprint)=paste(format(rn,right=TRUE,scientific=FALSE),":",sep="") else rownames(toprint)=rep.int("", nrow(toprint)) if (is.null(names(x))) colnames(toprint)=rep("NA", ncol(toprint)) # fixes bug #4934 + if (isTRUE(print.class)) { + #Matching table for most common types & their abbreviations + class_abb <- c(list = "", integer = "", numeric = "", + character = "", Date = "", complex = "", + factor = "", POSIXct = "", logical = "", + IDate = "", integer64 = "", raw = "", + expression = "", ordered = "") + classes <- unname(class_abb[vapply(x, function(col) class(col)[1L], character(1L))]) + classes[idx] <- + vapply(x[ , idx <- which(is.na(classes)), with = FALSE], + function(col) paste0("<", class(col)[1L], ">"), character(1)) + toprint = rbind(classes, toprint) + rownames(toprint)[1L] <- "" + } if (printdots) { toprint = rbind(head(toprint,topn),"---"="",tail(toprint,topn)) rownames(toprint) = format(rownames(toprint),justify="right") diff --git a/R/onLoad.R b/R/onLoad.R index 734e162c5c..7a717a9664 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -32,6 +32,7 @@ "datatable.optimize"="Inf", # datatable. "datatable.print.nrows"="100L", # datatable. "datatable.print.topn"="5L", # datatable. + "datatable.print.class"="FALSE", # for print.data.table "datatable.allow.cartesian"="FALSE", # datatable. "datatable.dfdispatchwarn"="TRUE", # not a function argument "datatable.warnredundantby"="TRUE", # not a function argument diff --git a/README.md b/README.md index 75414d7bda..dc24a9a67d 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ 19. `dcast.data.table` now allows `drop = c(FALSE, TRUE)` and `drop = c(TRUE, FALSE)`. The former only fills all missing combinations of formula LHS, where as the latter fills only all missing combinations of formula RHS. Thanks to Ananda Mahto for [this SO post](http://stackoverflow.com/q/34830908/559784) and to Jaap for filing [#1512](https://github.com/Rdatatable/data.table/issues/1512). 20. `data.table()` function gains `stringsAsFactors` argument with default `FALSE`, [#643](https://github.com/Rdatatable/data.table/issues/643). Thanks to @Jan for reviving this issue. + + 21. New argument `print.class` for `print.data.table` allows for including column class under column names (as inspired by `tbl_df` in `dplyr`); default (adjustable via `"datatable.print.class"` option) is `FALSE`, the inherited behavior. Part of [#1523](https://github.com/Rdatatable/data.table/issues/1523); thanks to @MichaelChirico for the FR & PR. #### BUG FIXES diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c722259b2a..5380a4963c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7393,6 +7393,26 @@ test(1599.2, data.table(x=vv, y=1:10, stringsAsFactors=TRUE)$x, factor(vv)) DT <- data.table(a = 0L:1L, b = c(1L, 1L)) test(1600, DT[ , lapply(.SD, function(x) if (all(x)) x)], data.table(b=c(1L, 1L))) +# set of enhancements to print.data.table for #1523 +## dplyr-like column summary +icol <- 1L:3L +Dcol <- as.Date(paste0("2016-01-0", 1:3)) +DT1 <- data.table(lcol = list(list(1:3), list(1:3), list(1:3)), + icol, ncol = as.numeric(icol), ccol = c("a", "b", "c"), + xcol = as.complex(icol), ocol = factor(icol, ordered = TRUE), + fcol = factor(icol)) + +DT2 <- data.table(Dcol, Pcol = as.POSIXct(Dcol), + gcol = as.logical(icol), + Icol = as.IDate(Dcol), ucol = icol) +class(DT2$ucol) <- "asdf" +test(1601.1, capture.output(print(DT1, print.class = TRUE)), + c(" lcol icol ncol ccol xcol ocol fcol", + " ", + "1: 1 1 a 1+0i 1 1", + "2: 2 2 b 2+0i 2 2", + "3: 3 3 c 3+0i 3 3")) + ########################## # TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time. diff --git a/man/print.data.table.Rd b/man/print.data.table.Rd new file mode 100644 index 0000000000..377c55ed56 --- /dev/null +++ b/man/print.data.table.Rd @@ -0,0 +1,48 @@ +\name{print.data.table} +\alias{print.data.table} +\title{ data.table Printing Options } +\description{ + \code{print.data.table} extends the functionalities of \code{print.data.frame}. + + Key enhancements include automatic output compression of many observations and concise column-wise \code{class} summary. +} +\usage{ + \method{print}{data.table}(x, + topn=getOption("datatable.print.topn"), # default: 5 + nrows=getOption("datatable.print.nrows"), # default: 100 + print.class=getOption("datatable.print.class"), # default: FALSE + row.names=TRUE,quote=FALSE,...) +} +\arguments{ + \item{x}{ A \code{data.table}. } + \item{topn}{ The number of rows to be printed from the beginning and end of tables with more than \code{nrows} rows. } + \item{nrows}{ The number of rows which will be printed before truncation is enforced. } + \item{print.class}{ If \code{TRUE}, the resulting output will include above each column its storage class (or a self-evident abbreviation thereof). } + \item{row.names}{ If \code{TRUE}, row indices will be printed alongside \code{x}. } + \item{quote}{ If \code{TRUE}, all output will appear in quotes, as in \code{print.default}. } + \item{\dots}{ Other arguments ultimately passed to \code{format}. } +} +\details{ + By default, with an eye to the typically large number of observations in a code{data.table}, only the beginning and end of the object are displayed (specifically, \code{head(x, topn)} and \code{tail(x, topn)} are displayed unless \code{nrow(x) < nrows}, in which case all rows will print). +} +\seealso{\code{\link{print.default}}} +\examples{ + #output compression + DT <- data.table(a = 1:1000) + print(DT, nrows = 100, topn = 4) + + #`quote` can be used to identify whitespace + DT <- data.table(blanks = c(" 12", " 34"), + noblanks = c("12", "34")) + print(DT, quote = TRUE) + + #`print.class` provides handy column type summaries at a glance + DT <- data.table(a = vector("integer", 3), + b = vector("complex", 3), + c = as.IDate(paste0("2016-02-0", 1:3))) + print(DT, print.class = TRUE) + + #`row.names` can be eliminated to save space + DT <- data.table(a = 1:3) + print(DT, row.names = FALSE) +} \ No newline at end of file