Skip to content

Commit 58c091a

Browse files
Merge pull request #1529 from MichaelChirico/print.data.table
#1523 progress: adds option for dplyr-inspired column class summary with printing
2 parents bb6d4db + 045ba8a commit 58c091a

File tree

5 files changed

+90
-4
lines changed

5 files changed

+90
-4
lines changed

R/data.table.R

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,11 @@ setPackageName("data.table",.global)
8989
# So even though .BY doesn't appear in this file, it should still be NULL here and exported because it's
9090
# defined in SDenv and can be used by users.
9191

92-
print.data.table <- function(x,
93-
topn=getOption("datatable.print.topn"), # (5) print the top topn and bottom topn rows with '---' inbetween
94-
nrows=getOption("datatable.print.nrows"), # (100) under this the whole (small) table is printed, unless topn is provided
95-
row.names = TRUE, quote = FALSE, ...)
92+
print.data.table <-
93+
function(x, topn = getOption("datatable.print.topn"), # (5) print the top topn and bottom topn rows with '---' inbetween
94+
nrows = getOption("datatable.print.nrows"), # (100) under this the whole (small) table is printed, unless topn is provided
95+
print.class = getOption("datatable.print.class"), # (FALSE) whether to include beneath each column a summary of its class
96+
row.names = TRUE, quote= FALSE, ...)
9697
{
9798
if (.global$print!="" && address(x)==.global$print) { # The !="" is to save address() calls and R's global cache of address strings
9899
# := in [.data.table sets .global$print=address(x) to suppress the next print i.e., like <- does. See FAQ 2.22 and README item in v1.9.5
@@ -141,6 +142,20 @@ print.data.table <- function(x,
141142
# FR #5020 - add row.names = logical argument to print.data.table
142143
if (isTRUE(row.names)) rownames(toprint)=paste(format(rn,right=TRUE,scientific=FALSE),":",sep="") else rownames(toprint)=rep.int("", nrow(toprint))
143144
if (is.null(names(x))) colnames(toprint)=rep("NA", ncol(toprint)) # fixes bug #4934
145+
if (isTRUE(print.class)) {
146+
#Matching table for most common types & their abbreviations
147+
class_abb <- c(list = "<list>", integer = "<int>", numeric = "<num>",
148+
character = "<char>", Date = "<Date>", complex = "<cplx>",
149+
factor = "<fctr>", POSIXct = "<POSc>", logical = "<lgcl>",
150+
IDate = "<IDat>", integer64 = "<i64>", raw = "<raw>",
151+
expression = "<expr>", ordered = "<ord>")
152+
classes <- unname(class_abb[vapply(x, function(col) class(col)[1L], character(1L))])
153+
classes[idx] <-
154+
vapply(x[ , idx <- which(is.na(classes)), with = FALSE],
155+
function(col) paste0("<", class(col)[1L], ">"), character(1))
156+
toprint = rbind(classes, toprint)
157+
rownames(toprint)[1L] <- ""
158+
}
144159
if (printdots) {
145160
toprint = rbind(head(toprint,topn),"---"="",tail(toprint,topn))
146161
rownames(toprint) = format(rownames(toprint),justify="right")

R/onLoad.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"datatable.optimize"="Inf", # datatable.<argument name>
3333
"datatable.print.nrows"="100L", # datatable.<argument name>
3434
"datatable.print.topn"="5L", # datatable.<argument name>
35+
"datatable.print.class"="FALSE", # for print.data.table
3536
"datatable.allow.cartesian"="FALSE", # datatable.<argument name>
3637
"datatable.dfdispatchwarn"="TRUE", # not a function argument
3738
"datatable.warnredundantby"="TRUE", # not a function argument

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@
5555
19. `dcast.data.table` now allows `drop = c(FALSE, TRUE)` and `drop = c(TRUE, FALSE)`. The former only fills all missing combinations of formula LHS, where as the latter fills only all missing combinations of formula RHS. Thanks to Ananda Mahto for [this SO post](http://stackoverflow.com/q/34830908/559784) and to Jaap for filing [#1512](https://github.com/Rdatatable/data.table/issues/1512).
5656

5757
20. `data.table()` function gains `stringsAsFactors` argument with default `FALSE`, [#643](https://github.com/Rdatatable/data.table/issues/643). Thanks to @Jan for reviving this issue.
58+
59+
21. New argument `print.class` for `print.data.table` allows for including column class under column names (as inspired by `tbl_df` in `dplyr`); default (adjustable via `"datatable.print.class"` option) is `FALSE`, the inherited behavior. Part of [#1523](https://github.com/Rdatatable/data.table/issues/1523); thanks to @MichaelChirico for the FR & PR.
5860

5961
#### BUG FIXES
6062

inst/tests/tests.Rraw

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7465,6 +7465,26 @@ test(1608, dim(fread('issue_1462_fread_quotes.txt', header=FALSE)), c(4L, 224L))
74657465
# fix for #1164
74667466
test(1609, fread("issue_1164_json.txt"), data.table(json1='{""f1"":""value1"",""f2"":""double quote escaped with a backslash [ \\"" ]""}', string1="string field"))
74677467

7468+
# set of enhancements to print.data.table for #1523
7469+
## dplyr-like column summary
7470+
icol <- 1L:3L
7471+
Dcol <- as.Date(paste0("2016-01-0", 1:3))
7472+
DT1 <- data.table(lcol = list(list(1:3), list(1:3), list(1:3)),
7473+
icol, ncol = as.numeric(icol), ccol = c("a", "b", "c"),
7474+
xcol = as.complex(icol), ocol = factor(icol, ordered = TRUE),
7475+
fcol = factor(icol))
7476+
7477+
DT2 <- data.table(Dcol, Pcol = as.POSIXct(Dcol),
7478+
gcol = as.logical(icol),
7479+
Icol = as.IDate(Dcol), ucol = icol)
7480+
class(DT2$ucol) <- "asdf"
7481+
test(1601.1, capture.output(print(DT1, print.class = TRUE)),
7482+
c(" lcol icol ncol ccol xcol ocol fcol",
7483+
" <list> <int> <num> <char> <cplx> <ord> <fctr>",
7484+
"1: <list> 1 1 a 1+0i 1 1",
7485+
"2: <list> 2 2 b 2+0i 2 2",
7486+
"3: <list> 3 3 c 3+0i 3 3"))
7487+
74687488
##########################
74697489

74707490
# TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time.

man/print.data.table.Rd

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
\name{print.data.table}
2+
\alias{print.data.table}
3+
\title{ data.table Printing Options }
4+
\description{
5+
\code{print.data.table} extends the functionalities of \code{print.data.frame}.
6+
7+
Key enhancements include automatic output compression of many observations and concise column-wise \code{class} summary.
8+
}
9+
\usage{
10+
\method{print}{data.table}(x,
11+
topn=getOption("datatable.print.topn"), # default: 5
12+
nrows=getOption("datatable.print.nrows"), # default: 100
13+
print.class=getOption("datatable.print.class"), # default: FALSE
14+
row.names=TRUE,quote=FALSE,...)
15+
}
16+
\arguments{
17+
\item{x}{ A \code{data.table}. }
18+
\item{topn}{ The number of rows to be printed from the beginning and end of tables with more than \code{nrows} rows. }
19+
\item{nrows}{ The number of rows which will be printed before truncation is enforced. }
20+
\item{print.class}{ If \code{TRUE}, the resulting output will include above each column its storage class (or a self-evident abbreviation thereof). }
21+
\item{row.names}{ If \code{TRUE}, row indices will be printed alongside \code{x}. }
22+
\item{quote}{ If \code{TRUE}, all output will appear in quotes, as in \code{print.default}. }
23+
\item{\dots}{ Other arguments ultimately passed to \code{format}. }
24+
}
25+
\details{
26+
By default, with an eye to the typically large number of observations in a code{data.table}, only the beginning and end of the object are displayed (specifically, \code{head(x, topn)} and \code{tail(x, topn)} are displayed unless \code{nrow(x) < nrows}, in which case all rows will print).
27+
}
28+
\seealso{\code{\link{print.default}}}
29+
\examples{
30+
#output compression
31+
DT <- data.table(a = 1:1000)
32+
print(DT, nrows = 100, topn = 4)
33+
34+
#`quote` can be used to identify whitespace
35+
DT <- data.table(blanks = c(" 12", " 34"),
36+
noblanks = c("12", "34"))
37+
print(DT, quote = TRUE)
38+
39+
#`print.class` provides handy column type summaries at a glance
40+
DT <- data.table(a = vector("integer", 3),
41+
b = vector("complex", 3),
42+
c = as.IDate(paste0("2016-02-0", 1:3)))
43+
print(DT, print.class = TRUE)
44+
45+
#`row.names` can be eliminated to save space
46+
DT <- data.table(a = 1:3)
47+
print(DT, row.names = FALSE)
48+
}

0 commit comments

Comments
 (0)