Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 56 additions & 7 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,11 @@ setPackageName("data.table",.global)
# So even though .BY doesn't appear in this file, it should still be NULL here and exported because it's
# defined in SDenv and can be used by users.

print.data.table <- function(x,
topn=getOption("datatable.print.topn"), # (5) print the top topn and bottom topn rows with '---' inbetween
nrows=getOption("datatable.print.nrows"), # (100) under this the whole (small) table is printed, unless topn is provided
row.names = TRUE, quote = FALSE, ...)
print.data.table <-
function(x, topn = getOption("datatable.print.topn"), # (5) print the top topn and bottom topn rows with '---' inbetween
nrows = getOption("datatable.print.nrows"), # (100) under this the whole (small) table is printed, unless topn is provided
print.class = getOption("datatable.print.class"), # (FALSE) whether to include beneath each column a summary of its class
by = NULL, row.names = TRUE, quote= FALSE, ...)
{
if (.global$print!="" && address(x)==.global$print) { # The !="" is to save address() calls and R's global cache of address strings
# := in [.data.table sets .global$print=address(x) to suppress the next print i.e., like <- does. See FAQ 2.22 and README item in v1.9.5
Expand Down Expand Up @@ -133,6 +134,18 @@ print.data.table <- function(x,
rn = seq_len(nrow(x))
printdots = FALSE
}
if (nnlby <- !is.null(by)){
if (length(grep(",", by))){
if (length(by) > 1L) stop("'by' is of length ", length(by), " but one or more items ",
"include a comma. Either pass a vector of column names ",
"(which can contain spaces, but no commas), or pass a ",
"vector length 1 containing comma separated column names.")
by = strsplit(by, split = ",")[[1L]]
}
if (length(forderv(x, by))) stop("Use of 'by' for printing is currently restricted to sorted tables. ",
"Please use, e.g., 'keyby' or a keyed table before printing.")
bylen = toprint[ , .N, keyby = by]$N
}
toprint=format.data.table(toprint, ...)
# fix for #975.
if (any(sapply(x, function(col) "integer64" %in% class(col))) && !"package:bit64" %in% search()) {
Expand All @@ -141,16 +154,52 @@ print.data.table <- function(x,
# FR #5020 - add row.names = logical argument to print.data.table
if (isTRUE(row.names)) rownames(toprint)=paste(format(rn,right=TRUE,scientific=FALSE),":",sep="") else rownames(toprint)=rep.int("", nrow(toprint))
if (is.null(names(x))) colnames(toprint)=rep("NA", ncol(toprint)) # fixes bug #4934
if (isTRUE(print.class)) {
#Matching table for most common types & their abbreviations
class_abb <- c(list = "<list>", integer = "<int>", numeric = "<num>",
character = "<char>", Date = "<Date>", complex = "<cplx>",
factor = "<fctr>", POSIXct = "<POSc>", logical = "<lgcl>",
IDate = "<IDat>", integer64 = "<i64>", raw = "<raw>",
expression = "<expr>", ordered = "<ord>")
classes <- unname(class_abb[vapply(x, function(col) class(col)[1L], character(1L))])
classes[idx] <-
vapply(x[ , idx <- which(is.na(classes)), with = FALSE],
function(col) paste0("<", class(col)[1L], ">"), character(1))
toprint = rbind(classes, toprint)
rownames(toprint)[1L] <- ""
}
if (printdots) {
toprint = rbind(head(toprint,topn),"---"="",tail(toprint,topn))
if (nnlby){
#if a break was induced by truncation, let the whitespace introduced
# thereby serve the 'by' whitespace's purposes
if (any(idx <- cumsum(bylen) == topn)) bylen <- bylen[-which(idx)]
out = matrix(fll <- getOption("datatable.print.byfill"),
nrow = nr <- nrow(toprint) + length(bylen) - 1L, ncol = ncol(toprint),
dimnames = list(rep(fll, nr), colnames(toprint)))
out[idx <- -(print.class + cumsum(bylen[-length(bylen)] + 1L)), ] <- toprint
if (isTRUE(row.names)) rownames(out)[idx] <- rownames(toprint)
rownames(toprint) = format(rownames(toprint), justify = "right")
print(out, right = TRUE, quote = quote)
return(invisible())
}
rownames(toprint) = format(rownames(toprint),justify="right")
print(toprint,right=TRUE,quote=quote)
print(toprint, right=TRUE, quote=quote)
return(invisible())
}
if (nrow(toprint)>20L)
# repeat colnames at the bottom if over 20 rows so you don't have to scroll up to see them
toprint=rbind(toprint,matrix(colnames(toprint),nrow=1)) # fixes bug #4934
print(toprint,right=TRUE,quote=quote)
toprint=rbind(toprint, matrix(colnames(toprint), nrow = 1L)) # fixes bug #4934
if (nnlby){
out = matrix(fll <- getOption("datatable.print.byfill"),
nrow = nr <- nrow(toprint) + length(bylen) - 1L, ncol = ncol(toprint),
dimnames = list(rep(fll, nr), colnames(toprint)))
out[idx <- -(print.class + cumsum(bylen[-length(bylen)] + 1L)), ] <- toprint
if (isTRUE(row.names)) rownames(out)[idx] <- rownames(toprint)
print(out, right = TRUE, quote = quote)
return(invisible())
}
print(toprint, right = TRUE, quote = quote)
invisible()
}

Expand Down
2 changes: 2 additions & 0 deletions R/onLoad.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
"datatable.optimize"="Inf", # datatable.<argument name>
"datatable.print.nrows"="100L", # datatable.<argument name>
"datatable.print.topn"="5L", # datatable.<argument name>
"datatable.print.class"="FALSE", # for print.data.table
"datatable.print.byfill"="\"\"", # for print.data.table
"datatable.allow.cartesian"="FALSE", # datatable.<argument name>
"datatable.dfdispatchwarn"="TRUE", # not a function argument
"datatable.warnredundantby"="TRUE", # not a function argument
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@
19. `dcast.data.table` now allows `drop = c(FALSE, TRUE)` and `drop = c(TRUE, FALSE)`. The former only fills all missing combinations of formula LHS, where as the latter fills only all missing combinations of formula RHS. Thanks to Ananda Mahto for [this SO post](http://stackoverflow.com/q/34830908/559784) and to Jaap for filing [#1512](https://github.com/Rdatatable/data.table/issues/1512).

20. `data.table()` function gains `stringsAsFactors` argument with default `FALSE`, [#643](https://github.com/Rdatatable/data.table/issues/643). Thanks to @Jan for reviving this issue.

21. New argument `print.class` for `print.data.table` allows for including column class under column names (as inspired by `tbl_df` in `dplyr`); default (adjustable via `"datatable.print.class"` option) is `FALSE`, the inherited behavior. Part of [#1523](https://github.com/Rdatatable/data.table/issues/1523); thanks to @MichaelChirico for the FR & PR.

22. New argument `by` for `print.data.table` allows (on sorted tables) for some whitespace to be introduced in between specified `by` groupings. For flexibility, the whitespace fill can be set by the `"datatable.print.byfill"` option. Part of [#1523](https://github.com/Rdatatable/data.table/issues/1523); thanks to Yike Lu for the R-Forge FR, @franknarf1 for input on implementation, and @MichaelChirico for the PR.

#### BUG FIXES

Expand Down
59 changes: 59 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -7393,6 +7393,65 @@ test(1599.2, data.table(x=vv, y=1:10, stringsAsFactors=TRUE)$x, factor(vv))
DT <- data.table(a = 0L:1L, b = c(1L, 1L))
test(1600, DT[ , lapply(.SD, function(x) if (all(x)) x)], data.table(b=c(1L, 1L)))

# set of enhancements to print.data.table for #1523
## dplyr-like column summary
icol <- 1L:3L
Dcol <- as.Date(paste0("2016-01-0", 1:3))
DT1 <- data.table(lcol = list(list(1:3), list(1:3), list(1:3)),
icol, ncol = as.numeric(icol), ccol = c("a", "b", "c"),
xcol = as.complex(icol), ocol = factor(icol, ordered = TRUE),
fcol = factor(icol))

DT2 <- data.table(Dcol, Pcol = as.POSIXct(Dcol),
gcol = as.logical(icol),
Icol = as.IDate(Dcol), ucol = icol)
class(DT2$ucol) <- "asdf"
test(1601.1, capture.output(print(DT1, print.class = TRUE)),
c(" lcol icol ncol ccol xcol ocol fcol",
" <list> <int> <num> <char> <cplx> <ord> <fctr>",
"1: <list> 1 1 a 1+0i 1 1",
"2: <list> 2 2 b 2+0i 2 2",
"3: <list> 3 3 c 3+0i 3 3"))

##by grouping demarcation
set.seed(98549)
DT1 <- data.table(a = rep(c("A", "B"), each = 4),
b = rep(rep(c("A", "B"), 2), c(2, 2, 1, 3)),
c = runif(8))

test(1601.2, capture.output(print(DT1, by = "a")),
c(" a b c", "1: A A 0.98008318", "2: A A 0.90173297",
"3: A B 0.62432818", "4: A B 0.05347447", " ",
"5: B A 0.65027844", "6: B B 0.53764463", "7: B B 0.99533002",
"8: B B 0.95554375"))
test(1601.3, capture.output(print(DT1, by = "a,b")),
c(" a b c", "1: A A 0.98008318", "2: A A 0.90173297",
" ", "3: A B 0.62432818", "4: A B 0.05347447",
" ", "5: B A 0.65027844", " ",
"6: B B 0.53764463", "7: B B 0.99533002", "8: B B 0.95554375"))
test(1601.4, print(DT1[.N:1], by = "a"), error = "Use of 'by' for printing")

DT2 <- data.table(a = c("a", rep("b", 100), rep("c", 10)),
b = runif(111))
test(1601.5, capture.output(print(DT2, by = "a")),
c(" a b", " 1: a 0.034000229", " ",
" 2: b 0.274722317", " 3: b 0.409345747", " 4: b 0.494246782",
" 5: b 0.639384426", "--- ", "107: c 0.406546372",
"108: c 0.275413455", "109: c 0.002144594", "110: c 0.395671326",
"111: c 0.424717964"))
test(1601.6, capture.output(print(DT2[a!="c"], by = "a")),
c(" a b", " 1: a 0.03400023", " ",
" 2: b 0.27472232", " 3: b 0.40934575", " 4: b 0.49424678",
" 5: b 0.63938443", "--- ", " 97: b 0.32467134",
" 98: b 0.61198181", " 99: b 0.74260287", "100: b 0.61692269",
"101: b 0.39077314"))
options(datatable.print.byfill = "-")
test(1601.7, capture.output(print(DT1, by = c("a", "b"))),
c(" a b c", "1: A A 0.98008318", "2: A A 0.90173297",
"- - - -", "3: A B 0.62432818", "4: A B 0.05347447",
"- - - -", "5: B A 0.65027844", "- - - -",
"6: B B 0.53764463", "7: B B 0.99533002", "8: B B 0.95554375"))

##########################

# TODO: Tests involving GForce functions needs to be run with optimisation level 1 and 2, so that both functions are tested all the time.
Expand Down
51 changes: 51 additions & 0 deletions man/print.data.table.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
\name{print.data.table}
\alias{print.data.table}
\title{ data.table Printing Options }
\description{
\code{print.data.table} extends the functionalities of \code{print.data.frame}.

Key enhancements include automatic output compression of many observations and concise column-wise \code{class} summary.
}
\usage{
\method{print}{data.table}(x,
topn=getOption("datatable.print.topn"), # default: 5
nrows=getOption("datatable.print.nrows"), # default: 100
print.class=getOption("datatable.print.class"), # default: FALSE
by=NULL,row.names=TRUE,quote=FALSE,...)
}
\arguments{
\item{x}{ A \code{data.table}. }
\item{topn}{ The number of rows to be printed from the beginning and end of tables with more than \code{nrows} rows. }
\item{nrows}{ The number of rows which will be printed before truncation is enforced. }
\item{print.class}{ If \code{TRUE}, the resulting output will include above each column its storage class (or a self-evident abbreviation thereof). }
\item{by}{ Columns \code{by} which \code{x} to demarcate with white space when printing. See Details.}
\item{row.names}{ If \code{TRUE}, row indices will be printed alongside \code{x}. }
\item{quote}{ If \code{TRUE}, all output will appear in quotes, as in \code{print.default}. }
\item{\dots}{ Other arguments ultimately passed to \code{format}. }
}
\details{
By default, with an eye to the typically large number of observations in a code{data.table}, only the beginning and end of the object are displayed (specifically, \code{head(x, topn)} and \code{tail(x, topn)} are displayed unless \code{nrow(x) < nrows}, in which case all rows will print).

\code{by} will, for ready at-a-glance evaluation of a grouped table, produce a table with extra rows of white space (or the user's choice set \emph{via} \code{options(datatable.print.byfill)}), inserted between each group as defined by \code{x[ , .SD, by = by]}. This is explicitly designed to be used on sorted tables; it is an error to attempt otherwise. Truncation via \code{topn} or \code{nrows} occurs first, so tinker with those options if you want a more detailed glimpse.
}
\seealso{\code{\link{print.default}}}
\examples{
#output compression
DT <- data.table(a = 1:1000)
print(DT, nrows = 100, topn = 4)

#`quote` can be used to identify whitespace
DT <- data.table(blanks = c(" 12", " 34"),
noblanks = c("12", "34"))
print(DT, quote = TRUE)

#`print.class` provides handy column type summaries at a glance
DT <- data.table(a = vector("integer", 3),
b = vector("complex", 3),
c = as.IDate(paste0("2016-02-0", 1:3)))
print(DT, print.class = TRUE)

#`row.names` can be eliminated to save space
DT <- data.table(a = 1:3)
print(DT, row.names = FALSE)
}