From 045ba8a3f13f10490afc15cc2a3316854eb0425c Mon Sep 17 00:00:00 2001
From: Michael Chirico <MichaelChirico4@gmail.com>
Date: Tue, 9 Feb 2016 19:50:55 -0500
Subject: [PATCH] adds option for dplyr-inspired column class summary with
 printing

attempting test fix again

gave up on second test

fixing error in .Rd
---
 R/data.table.R          | 23 ++++++++++++++++----
 R/onLoad.R              |  1 +
 README.md               |  2 ++
 inst/tests/tests.Rraw   | 20 +++++++++++++++++
 man/print.data.table.Rd | 48 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 4 deletions(-)
 create mode 100644 man/print.data.table.Rd
diff --git a/R/data.table.R b/R/data.table.R
index fb7b8de46f..31249d414a 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -89,10 +89,11 @@ setPackageName("data.table",.global)
 # So even though .BY doesn't appear in this file, it should still be NULL here and exported because it's
 # defined in SDenv and can be used by users.
 
-print.data.table <- function(x,
-    topn=getOption("datatable.print.topn"),   # (5) print the top topn and bottom topn rows with '---' inbetween
-    nrows=getOption("datatable.print.nrows"), # (100) under this the whole (small) table is printed, unless topn is provided
-    row.names = TRUE, quote = FALSE, ...)
+print.data.table <- 
+  function(x, topn = getOption("datatable.print.topn"),      # (5) print the top topn and bottom topn rows with '---' inbetween
+           nrows = getOption("datatable.print.nrows"),       # (100) under this the whole (small) table is printed, unless topn is provided
+           print.class = getOption("datatable.print.class"), # (FALSE) whether to include beneath each column a summary of its class
+           row.names = TRUE, quote= FALSE, ...)
 {
     if (.global$print!="" && address(x)==.global$print) {   # The !="" is to save address() calls and R's global cache of address strings
         #  := in [.data.table sets .global$print=address(x) to suppress the next print i.e., like <- does. See FAQ 2.22 and README item in v1.9.5
@@ -141,6 +142,20 @@ print.data.table <- function(x,
     # FR #5020 - add row.names = logical argument to print.data.table
     if (isTRUE(row.names)) rownames(toprint)=paste(format(rn,right=TRUE,scientific=FALSE),":",sep="") else rownames(toprint)=rep.int("", nrow(toprint))
     if (is.null(names(x))) colnames(toprint)=rep("NA", ncol(toprint)) # fixes bug #4934
+    if (isTRUE(print.class)) {
+      #Matching table for most common types & their abbreviations
+      class_abb <- c(list = "<list>", integer = "<int>", numeric = "<num>",
+                     character = "<char>", Date = "<Date>", complex = "<cplx>",
+                     factor = "<fctr>", POSIXct = "<POSc>", logical = "<lgcl>",
+                     IDate = "<IDat>", integer64 = "<i64>", raw = "<raw>",
+                     expression = "<expr>", ordered = "<ord>")
+      classes <- unname(class_abb[vapply(x, function(col) class(col)[1L], character(1L))])
+      classes[idx] <- 
+        vapply(x[ , idx <- which(is.na(classes)), with = FALSE], 
+               function(col) paste0("<", class(col)[1L], ">"), character(1))
+      toprint = rbind(classes, toprint)
+      rownames(toprint)[1L] <- ""
+    }
     if (printdots) {
         toprint = rbind(head(toprint,topn),"---"="",tail(toprint,topn))
         rownames(toprint) = format(rownames(toprint),justify="right")
diff --git a/R/onLoad.R b/R/onLoad.R
index 734e162c5c..7a717a9664 100644
--- a/R/onLoad.R
+++ b/R/onLoad.R
@@ -32,6 +32,7 @@
              "datatable.optimize"="Inf",             # datatable.<argument name>
              "datatable.print.nrows"="100L",         # datatable.<argument name>
              "datatable.print.topn"="5L",            # datatable.<argument name>
+             "datatable.print.class"="FALSE",        # for print.data.table
              "datatable.allow.cartesian"="FALSE",    # datatable.<argument name>
              "datatable.dfdispatchwarn"="TRUE",                   # not a function argument
              "datatable.warnredundantby"="TRUE",                  # not a function argument
diff --git a/README.md b/README.md
index 75414d7bda..dc24a9a67d 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,8 @@
   19. `dcast.data.table` now allows `drop = c(FALSE, TRUE)` and `drop = c(TRUE, FALSE)`. The former only fills all missing combinations of formula LHS, where as the latter fills only all missing combinations of formula RHS. Thanks to Ananda Mahto for [this SO post](http://stackoverflow.com/q/34830908/559784) and to Jaap for filing [#1512](https://github.com/Rdatatable/data.table/issues/1512).
 
   20. `data.table()` function gains `stringsAsFactors` argument with default `FALSE`, [#643](https://github.com/Rdatatable/data.table/issues/643). Thanks to @Jan for reviving this issue.
+  
+  21. New argument `print.class` for `print.data.table` allows for including column class under column names (as inspired by `tbl_df` in `dplyr`); default (adjustable via `"datatable.print.class"` option) is `FALSE`, the inherited behavior. Part of [#1523](https://github.com/Rdatatable/data.table/issues/1523); thanks to @MichaelChirico for the FR & PR.
 
 #### BUG FIXES
 
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index c722259b2a..5380a4963c 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7393,6 +7393,26 @@ test(1599.2, data.table(x=vv, y=1:10, stringsAsFactors=TRUE)$x, factor(vv))
 DT <- data.table(a = 0L:1L, b = c(1L, 1L))
 test(1600, DT[ , lapply(.SD, function(x) if (all(x)) x)], data.table(b=c(1L, 1L)))
 
+# set of enhancements to print.data.table for #1523
+## dplyr-like column summary
+icol <- 1L:3L
+Dcol <- as.Date(paste0("2016-01-0", 1:3))
+DT1 <- data.table(lcol = list(list(1:3), list(1:3), list(1:3)),
+                  icol, ncol = as.numeric(icol), ccol = c("a", "b", "c"), 
+                  xcol = as.complex(icol), ocol = factor(icol, ordered = TRUE),
+                  fcol = factor(icol))
+
+DT2 <- data.table(Dcol, Pcol = as.POSIXct(Dcol), 
+                  gcol = as.logical(icol), 
+                  Icol = as.IDate(Dcol), ucol = icol)
+class(DT2$ucol) <- "asdf"
+test(1601.1, capture.output(print(DT1, print.class = TRUE)),
+     c("     lcol  icol  ncol   ccol   xcol  ocol   fcol",
+       "   <list> <int> <num> <char> <cplx> <ord> <fctr>", 
+       "1: <list>     1     1      a   1+0i     1      1", 
+       "2: <list>     2     2      b   2+0i     2      2", 
+       "3: <list>     3     3      c   3+0i     3      3"))
+
 ##########################
 
 # TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time.
diff --git a/man/print.data.table.Rd b/man/print.data.table.Rd
new file mode 100644
index 0000000000..377c55ed56
--- /dev/null
+++ b/man/print.data.table.Rd
@@ -0,0 +1,48 @@
+\name{print.data.table}
+\alias{print.data.table}
+\title{ data.table Printing Options }
+\description{
+  \code{print.data.table} extends the functionalities of \code{print.data.frame}.
+
+  Key enhancements include automatic output compression of many observations and concise column-wise \code{class} summary.
+}
+\usage{
+  \method{print}{data.table}(x,
+    topn=getOption("datatable.print.topn"),         # default: 5
+    nrows=getOption("datatable.print.nrows"),       # default: 100
+    print.class=getOption("datatable.print.class"), # default: FALSE
+    row.names=TRUE,quote=FALSE,...)
+}
+\arguments{
+  \item{x}{ A \code{data.table}. }
+  \item{topn}{ The number of rows to be printed from the beginning and end of tables with more than \code{nrows} rows. }
+  \item{nrows}{ The number of rows which will be printed before truncation is enforced. }
+  \item{print.class}{ If \code{TRUE}, the resulting output will include above each column its storage class (or a self-evident abbreviation thereof). }
+  \item{row.names}{ If \code{TRUE}, row indices will be printed alongside \code{x}. }
+  \item{quote}{ If \code{TRUE}, all output will appear in quotes, as in \code{print.default}. }
+  \item{\dots}{ Other arguments ultimately passed to \code{format}. }
+}
+\details{
+  By default, with an eye to the typically large number of observations in a code{data.table}, only the beginning and end of the object are displayed (specifically, \code{head(x, topn)} and \code{tail(x, topn)} are displayed unless \code{nrow(x) < nrows}, in which case all rows will print).
+}
+\seealso{\code{\link{print.default}}}
+\examples{
+  #output compression
+  DT <- data.table(a = 1:1000)
+  print(DT, nrows = 100, topn = 4)
+  
+  #`quote` can be used to identify whitespace
+  DT <- data.table(blanks = c(" 12", " 34"),
+                   noblanks = c("12", "34"))
+  print(DT, quote = TRUE)
+  
+  #`print.class` provides handy column type summaries at a glance
+  DT <- data.table(a = vector("integer", 3), 
+                   b = vector("complex", 3),
+                   c = as.IDate(paste0("2016-02-0", 1:3)))
+  print(DT, print.class = TRUE)
+  
+  #`row.names` can be eliminated to save space
+  DT <- data.table(a = 1:3)
+  print(DT, row.names = FALSE)
+}
\ No newline at end of file