Rdatatable · MichaelChirico · Feb 10, 2016
@@ -89,10 +89,11 @@ setPackageName("data.table",.global)
 # So even though .BY doesn't appear in this file, it should still be NULL here and exported because it's
 # defined in SDenv and can be used by users.
 
-print.data.table <- function(x,
-    topn=getOption("datatable.print.topn"),   # (5) print the top topn and bottom topn rows with '---' inbetween
-    nrows=getOption("datatable.print.nrows"), # (100) under this the whole (small) table is printed, unless topn is provided
-    row.names = TRUE, quote = FALSE, ...)
+print.data.table <- 
+  function(x, topn = getOption("datatable.print.topn"),      # (5) print the top topn and bottom topn rows with '---' inbetween
+           nrows = getOption("datatable.print.nrows"),       # (100) under this the whole (small) table is printed, unless topn is provided
+           print.class = getOption("datatable.print.class"), # (FALSE) whether to include beneath each column a summary of its class
+           by = NULL, row.names = TRUE, quote= FALSE, ...)
 {
     if (.global$print!="" && address(x)==.global$print) {   # The !="" is to save address() calls and R's global cache of address strings
         #  := in [.data.table sets .global$print=address(x) to suppress the next print i.e., like <- does. See FAQ 2.22 and README item in v1.9.5
@@ -133,6 +134,18 @@ print.data.table <- function(x,
         rn = seq_len(nrow(x))
         printdots = FALSE
     }
+    if (nnlby <- !is.null(by)){
+      if (length(grep(",", by))){
+        if (length(by) > 1L) stop("'by' is of length ", length(by), " but one or more items ",
+                                  "include a comma. Either pass a vector of column names ",
+                                  "(which can contain spaces, but no commas), or pass a ",
+                                  "vector length 1 containing comma separated column names.")
+        by = strsplit(by, split = ",")[[1L]]
+      }
+      if (length(forderv(x, by))) stop("Use of 'by' for printing is currently restricted to sorted tables. ",
+                                       "Please use, e.g.,  'keyby' or a keyed table before printing.")
+      bylen = toprint[ , .N, keyby = by]$N
+    }
     toprint=format.data.table(toprint, ...)
     # fix for #975.
     if (any(sapply(x, function(col) "integer64" %in% class(col))) && !"package:bit64" %in% search()) {
@@ -141,16 +154,52 @@ print.data.table <- function(x,
     # FR #5020 - add row.names = logical argument to print.data.table
     if (isTRUE(row.names)) rownames(toprint)=paste(format(rn,right=TRUE,scientific=FALSE),":",sep="") else rownames(toprint)=rep.int("", nrow(toprint))
     if (is.null(names(x))) colnames(toprint)=rep("NA", ncol(toprint)) # fixes bug #4934
+    if (isTRUE(print.class)) {
+      #Matching table for most common types & their abbreviations
+      class_abb <- c(list = "<list>", integer = "<int>", numeric = "<num>",
+                     character = "<char>", Date = "<Date>", complex = "<cplx>",
+                     factor = "<fctr>", POSIXct = "<POSc>", logical = "<lgcl>",
+                     IDate = "<IDat>", integer64 = "<i64>", raw = "<raw>",
+                     expression = "<expr>", ordered = "<ord>")
+      classes <- unname(class_abb[vapply(x, function(col) class(col)[1L], character(1L))])
+      classes[idx] <- 
+        vapply(x[ , idx <- which(is.na(classes)), with = FALSE], 
+               function(col) paste0("<", class(col)[1L], ">"), character(1))
+      toprint = rbind(classes, toprint)
+      rownames(toprint)[1L] <- ""
+    }
     if (printdots) {
         toprint = rbind(head(toprint,topn),"---"="",tail(toprint,topn))
+        if (nnlby){
+          #if a break was induced by truncation, let the whitespace introduced
+          #  thereby serve the 'by' whitespace's purposes
+          if (any(idx <- cumsum(bylen) == topn)) bylen <- bylen[-which(idx)]
+          out = matrix(fll <- getOption("datatable.print.byfill"), 
+                       nrow = nr <- nrow(toprint) + length(bylen) - 1L, ncol = ncol(toprint),
+                       dimnames = list(rep(fll, nr), colnames(toprint)))
+          out[idx <- -(print.class + cumsum(bylen[-length(bylen)] + 1L)), ] <- toprint
+          if (isTRUE(row.names)) rownames(out)[idx] <- rownames(toprint)
+          rownames(toprint) = format(rownames(toprint), justify = "right")
+          print(out, right = TRUE, quote = quote)
+          return(invisible())
+        }
         rownames(toprint) = format(rownames(toprint),justify="right")
-        print(toprint,right=TRUE,quote=quote)
+        print(toprint, right=TRUE, quote=quote)
         return(invisible())
     }
     if (nrow(toprint)>20L)
         # repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them
-        toprint=rbind(toprint,matrix(colnames(toprint),nrow=1)) # fixes bug #4934
-    print(toprint,right=TRUE,quote=quote)
+        toprint=rbind(toprint, matrix(colnames(toprint), nrow = 1L)) # fixes bug #4934
+    if (nnlby){
+        out = matrix(fll <- getOption("datatable.print.byfill"), 
+                     nrow = nr <- nrow(toprint) + length(bylen) - 1L, ncol = ncol(toprint),
+                     dimnames = list(rep(fll, nr), colnames(toprint)))
+        out[idx <- -(print.class + cumsum(bylen[-length(bylen)] + 1L)), ] <- toprint
+        if (isTRUE(row.names)) rownames(out)[idx] <- rownames(toprint)
+        print(out, right = TRUE, quote = quote)
+        return(invisible())
+    }
+    print(toprint, right = TRUE, quote = quote)
     invisible()
 }
 

@@ -32,6 +32,8 @@
              "datatable.optimize"="Inf",             # datatable.<argument name>
              "datatable.print.nrows"="100L",         # datatable.<argument name>
              "datatable.print.topn"="5L",            # datatable.<argument name>
+             "datatable.print.class"="FALSE",        # for print.data.table
+             "datatable.print.byfill"="\"\"",            # for print.data.table
              "datatable.allow.cartesian"="FALSE",    # datatable.<argument name>
              "datatable.dfdispatchwarn"="TRUE",                   # not a function argument
              "datatable.warnredundantby"="TRUE",                  # not a function argument

@@ -54,6 +54,10 @@
   19. `dcast.data.table` now allows `drop = c(FALSE, TRUE)` and `drop = c(TRUE, FALSE)`. The former only fills all missing combinations of formula LHS, where as the latter fills only all missing combinations of formula RHS. Thanks to Ananda Mahto for [this SO post](http://stackoverflow.com/q/34830908/559784) and to Jaap for filing [#1512](https://github.com/Rdatatable/data.table/issues/1512).
 
   20. `data.table()` function gains `stringsAsFactors` argument with default `FALSE`, [#643](https://github.com/Rdatatable/data.table/issues/643). Thanks to @Jan for reviving this issue.
+
+  21. New argument `print.class` for `print.data.table` allows for including column class under column names (as inspired by `tbl_df` in `dplyr`); default (adjustable via `"datatable.print.class"` option) is `FALSE`, the inherited behavior. Part of [#1523](https://github.com/Rdatatable/data.table/issues/1523); thanks to @MichaelChirico for the FR & PR.
+
+  22. New argument `by` for `print.data.table` allows (on sorted tables) for some whitespace to be introduced in between specified `by` groupings. For flexibility, the whitespace fill can be set by the `"datatable.print.byfill"` option. Part of [#1523](https://github.com/Rdatatable/data.table/issues/1523); thanks to Yike Lu for the R-Forge FR, @franknarf1 for input on implementation, and @MichaelChirico for the PR.
 
 #### BUG FIXES
 

@@ -7393,6 +7393,65 @@ test(1599.2, data.table(x=vv, y=1:10, stringsAsFactors=TRUE)$x, factor(vv))
 DT <- data.table(a = 0L:1L, b = c(1L, 1L))
 test(1600, DT[ , lapply(.SD, function(x) if (all(x)) x)], data.table(b=c(1L, 1L)))
 
+# set of enhancements to print.data.table for #1523
+## dplyr-like column summary
+icol <- 1L:3L
+Dcol <- as.Date(paste0("2016-01-0", 1:3))
+DT1 <- data.table(lcol = list(list(1:3), list(1:3), list(1:3)),
+                  icol, ncol = as.numeric(icol), ccol = c("a", "b", "c"), 
+                  xcol = as.complex(icol), ocol = factor(icol, ordered = TRUE),
+                  fcol = factor(icol))
+
+DT2 <- data.table(Dcol, Pcol = as.POSIXct(Dcol), 
+                  gcol = as.logical(icol), 
+                  Icol = as.IDate(Dcol), ucol = icol)
+class(DT2$ucol) <- "asdf"
+test(1601.1, capture.output(print(DT1, print.class = TRUE)),
+     c("     lcol  icol  ncol   ccol   xcol  ocol   fcol",
+       "   <list> <int> <num> <char> <cplx> <ord> <fctr>", 
+       "1: <list>     1     1      a   1+0i     1      1", 
+       "2: <list>     2     2      b   2+0i     2      2", 
+       "3: <list>     3     3      c   3+0i     3      3"))
+
+##by grouping demarcation
+set.seed(98549)
+DT1 <- data.table(a = rep(c("A", "B"), each = 4),
+                  b = rep(rep(c("A", "B"), 2), c(2, 2, 1, 3)),
+                  c = runif(8))
+
+test(1601.2, capture.output(print(DT1, by = "a")),
+     c("   a b          c", "1: A A 0.98008318", "2: A A 0.90173297", 
+      "3: A B 0.62432818", "4: A B 0.05347447", "                 ", 
+      "5: B A 0.65027844", "6: B B 0.53764463", "7: B B 0.99533002", 
+      "8: B B 0.95554375"))
+test(1601.3, capture.output(print(DT1, by = "a,b")),
+     c("   a b          c", "1: A A 0.98008318", "2: A A 0.90173297", 
+       "                 ", "3: A B 0.62432818", "4: A B 0.05347447", 
+       "                 ", "5: B A 0.65027844", "                 ", 
+       "6: B B 0.53764463", "7: B B 0.99533002", "8: B B 0.95554375"))
+test(1601.4, print(DT1[.N:1], by = "a"), error = "Use of 'by' for printing")
+
+DT2 <- data.table(a = c("a", rep("b", 100), rep("c", 10)),
+                  b = runif(111))
+test(1601.5, capture.output(print(DT2, by = "a")),
+     c("     a           b", "  1: a 0.034000229", "                  ", 
+       "  2: b 0.274722317", "  3: b 0.409345747", "  4: b 0.494246782", 
+       "  5: b 0.639384426", "---               ", "107: c 0.406546372", 
+       "108: c 0.275413455", "109: c 0.002144594", "110: c 0.395671326", 
+       "111: c 0.424717964"))
+test(1601.6, capture.output(print(DT2[a!="c"], by = "a")),
+     c("     a          b", "  1: a 0.03400023", "                 ", 
+       "  2: b 0.27472232", "  3: b 0.40934575", "  4: b 0.49424678", 
+       "  5: b 0.63938443", "---              ", " 97: b 0.32467134", 
+       " 98: b 0.61198181", " 99: b 0.74260287", "100: b 0.61692269", 
+       "101: b 0.39077314"))
+options(datatable.print.byfill = "-")
+test(1601.7, capture.output(print(DT1, by = c("a", "b"))),
+     c("   a b          c", "1: A A 0.98008318", "2: A A 0.90173297", 
+       "-  - -          -", "3: A B 0.62432818", "4: A B 0.05347447", 
+       "-  - -          -", "5: B A 0.65027844", "-  - -          -", 
+       "6: B B 0.53764463", "7: B B 0.99533002", "8: B B 0.95554375"))
+
 ##########################
 
 # TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time.

@@ -0,0 +1,51 @@
+\name{print.data.table}
+\alias{print.data.table}
+\title{ data.table Printing Options }
+\description{
+  \code{print.data.table} extends the functionalities of \code{print.data.frame}.
+
+  Key enhancements include automatic output compression of many observations and concise column-wise \code{class} summary.
+}
+\usage{
+  \method{print}{data.table}(x,
+    topn=getOption("datatable.print.topn"),         # default: 5
+    nrows=getOption("datatable.print.nrows"),       # default: 100
+    print.class=getOption("datatable.print.class"), # default: FALSE
+    by=NULL,row.names=TRUE,quote=FALSE,...)
+}
+\arguments{
+  \item{x}{ A \code{data.table}. }
+  \item{topn}{ The number of rows to be printed from the beginning and end of tables with more than \code{nrows} rows. }
+  \item{nrows}{ The number of rows which will be printed before truncation is enforced. }
+  \item{print.class}{ If \code{TRUE}, the resulting output will include above each column its storage class (or a self-evident abbreviation thereof). }
+  \item{by}{ Columns \code{by} which \code{x} to demarcate with white space when printing. See Details.}
+  \item{row.names}{ If \code{TRUE}, row indices will be printed alongside \code{x}. }
+  \item{quote}{ If \code{TRUE}, all output will appear in quotes, as in \code{print.default}. }
+  \item{\dots}{ Other arguments ultimately passed to \code{format}. }
+}
+\details{
+  By default, with an eye to the typically large number of observations in a code{data.table}, only the beginning and end of the object are displayed (specifically, \code{head(x, topn)} and \code{tail(x, topn)} are displayed unless \code{nrow(x) < nrows}, in which case all rows will print).
+
+  \code{by} will, for ready at-a-glance evaluation of a grouped table, produce a table with extra rows of white space (or the user's choice set \emph{via} \code{options(datatable.print.byfill)}), inserted between each group as defined by \code{x[ , .SD, by = by]}. This is explicitly designed to be used on sorted tables; it is an error to attempt otherwise. Truncation via \code{topn} or \code{nrows} occurs first, so tinker with those options if you want a more detailed glimpse.
+}
+\seealso{\code{\link{print.default}}}
+\examples{
+  #output compression
+  DT <- data.table(a = 1:1000)
+  print(DT, nrows = 100, topn = 4)
+
+  #`quote` can be used to identify whitespace
+  DT <- data.table(blanks = c(" 12", " 34"),
+                   noblanks = c("12", "34"))
+  print(DT, quote = TRUE)
+
+  #`print.class` provides handy column type summaries at a glance
+  DT <- data.table(a = vector("integer", 3), 
+                   b = vector("complex", 3),
+                   c = as.IDate(paste0("2016-02-0", 1:3)))
+  print(DT, print.class = TRUE)
+
+  #`row.names` can be eliminated to save space
+  DT <- data.table(a = 1:3)
+  print(DT, row.names = FALSE)
+}