Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

print.tbl_df() fixup #51

Merged
merged 37 commits into from
Jun 13, 2016
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
44d9cf9
omit dots if length known
Mar 18, 2016
92a7191
show number of missing rows in last line
Mar 19, 2016
ead7eca
omit source information and dimensions for data frame sources
Mar 19, 2016
ade1107
update README
Mar 19, 2016
3dea6e5
test output if number of rows unknown
Mar 19, 2016
68f726b
add test output
Mar 19, 2016
c928632
always print number of rows if zero-row or zero-col data frame
Mar 19, 2016
3f4f968
rename, test corner case
Mar 19, 2016
23e1860
new-style output
May 5, 2016
3c60db9
split print(), also return output invisibly
May 5, 2016
606e1f0
support n_extra = 0
May 5, 2016
40c032c
remove unused dim_desc()
May 5, 2016
179e3b1
update README
May 5, 2016
e5e62ca
more variables after colon, not in parentheses
May 5, 2016
390196f
format_extra_...() instead of print_extra_...()
May 5, 2016
e10ba05
extract format_extra()
May 5, 2016
880533d
knit_print() uses same extra formatting logic as trunc_mat()
May 5, 2016
22fa833
formatting
May 5, 2016
3dd4c87
separate concerns, simplify
May 5, 2016
f469b20
extra on one line, if possible
May 5, 2016
c858418
special case: zero rows
May 5, 2016
51f5693
update README
May 7, 2016
46bd4c8
use non-breaking space to keep name and type together
May 7, 2016
1477a2d
Merge branch 'master' into feature/19-remove-ellipsis
May 7, 2016
e965547
Merge remote-tracking branch 'origin/master' into feature/19-remove-e…
May 17, 2016
262424a
move code
May 17, 2016
b35106c
new unknown_rows helper class
May 17, 2016
127cb99
add desired output
May 17, 2016
1e9afc1
use question marks instead of NA for unknown dims
May 17, 2016
c66545a
don't print rows for empty data frames
May 17, 2016
396863f
use obj_sum() to print one-line summary
May 17, 2016
eaaa0e8
update README
May 17, 2016
c1e71be
explicitly register S3 methods used only in tests
May 17, 2016
fee06df
tibble instead of tbl_df in output
Jun 12, 2016
e019d2f
show big marks in size_sum()
Jun 13, 2016
a8454a9
update README
Jun 13, 2016
f4321f4
include summary in knitr output
Jun 13, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions R/tbl-df.r
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ as.data.frame.tbl_df <- function(x, row.names = NULL, optional = FALSE, ...) {
#' @rdname formatting
#' @export
print.tbl_df <- function(x, ..., n = NULL, width = NULL) {
cat("Source: local data frame ", dim_desc(x), "\n", sep = "")
cat("\n")
print(trunc_mat(x, n = n, width = width))

invisible(x)
}

Expand Down
126 changes: 84 additions & 42 deletions R/utils-format.r
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,6 @@
#' @name formatting
NULL

dim_desc <- function(x) {
d <- dim(x)
d2 <- big_mark(d)
d2[is.na(d)] <- "??"

paste0("[", paste0(d2, collapse = " x "), "]")
}

#' @export
#' @rdname formatting
#' @importFrom stats setNames
Expand All @@ -46,30 +38,34 @@ trunc_mat <- function(x, n = NULL, width = NULL, n_extra = NULL) {
n_extra <- n_extra %||% tibble_opt("max_extra_cols")

df <- as.data.frame(head(x, n))
var_types <- vapply(df, type_sum, character(1))
var_names <- names(df)
trunc_mat_impl(df, n, width, n_extra, rows)
}

trunc_mat_impl <- function(df, n, width, n_extra, rows) {
width <- tibble_width(width)
if (ncol(df) == 0 || nrow(df) == 0) {
shrunk <- list(table = NULL, extra = setNames(var_types, var_names))
} else {
shrunk <- shrink_mat(df, width, n_extra, var_names, var_types, rows, n)
}

return(structure(c(shrunk, list(width = width)), class = "trunc_mat"))
shrunk <- shrink_mat(df, width, rows, n)
trunc_info <- list(width = width, rows_total = rows, rows_min = nrow(df),
n_extra = n_extra)

structure(c(shrunk, trunc_info), class = "trunc_mat")
}

#' @importFrom stats setNames
shrink_mat <- function(df, width, n_extra, var_names, var_types, rows, n) {
shrink_mat <- function(df, width, rows, n) {
var_types <- vapply(df, type_sum, character(1))

if (ncol(df) == 0 || nrow(df) == 0) {
return(new_shrunk_mat(NULL, var_types))
}

df <- remove_rownames(df)

# Minimum width of each column is 5 "(int)", so we can make a quick first
# pass
max_cols <- floor(width / 5)
extra_wide <- seq_along(var_names) > max_cols
if (any(extra_wide)) {
df <- df[!extra_wide]
}
extra_wide <- (seq_along(df) > max_cols)
df[] <- df[!extra_wide]

# List columns need special treatment because format can't be trusted
classes <- paste0("<", vapply(df, type_sum, character(1)), ">")
Expand Down Expand Up @@ -104,41 +100,86 @@ shrink_mat <- function(df, width, n_extra, var_names, var_types, rows, n) {
shrunk <- rbind(" " = classes, shrunk)
colnames(shrunk) <- colnames(df)[!too_wide]

needs_dots <- is.na(rows) || rows > n
if (is.na(rows))
needs_dots <- (nrow(df) >= n)
else
needs_dots <- (rows > n)
if (needs_dots) {
dot_width <- pmin(w[-1][!too_wide], 3)
dots <- vapply(dot_width, function(i) paste(rep(".", i), collapse = ""),
FUN.VALUE = character(1))
shrunk <- rbind(shrunk, ".." = dots)
}

if (any(extra_wide)) {
extra_wide[seq_along(too_wide)] <- too_wide
extra <- setNames(var_types[extra_wide], var_names[extra_wide])
rows_missing <- rows - n
} else {
extra <- setNames(var_types[too_wide], var_names[too_wide])
rows_missing <- 0L
}

if (length(extra) > n_extra) {
more <- paste0("and ", length(extra) - n_extra, " more")
extra <- c(extra[1:n_extra], setNames("...", more))
}
extra_wide[seq_along(too_wide)] <- too_wide
new_shrunk_mat(shrunk, var_types[extra_wide], rows_missing)
}

list(table = shrunk, extra = extra)
new_shrunk_mat <- function(table, extra, rows_missing = NULL) {
list(table = table, extra = extra, rows_missing = rows_missing)
}

#' @export
print.trunc_mat <- function(x, ...) {
if (!is.null(x$table)) {
print_table(x)

extra <- format_extra(x)
if (length(extra) > 0) {
cat(wrap("... ", paste(extra, collapse = ", "), width = x$width), "\n",
sep = "")
}

invisible(x)
}

format_extra <- function(x) {
extra_rows <- format_extra_rows(x)
extra_cols <- format_extra_cols(x)

extra <- c(extra_rows, extra_cols)
if (length(extra) >= 1) {
extra[[1]] <- paste0("with ", extra[[1]])
extra[-1] <- vapply(extra[-1], function(ex) paste0("and ", ex), character(1))
}
extra
}

print_table <- function(x) {
if (!is.null(x$table))
print(x$table)
}

format_extra_rows <- function(x) {
if (!is.null(x$table)) {
if (is.na(x$rows_missing)) {
"more rows"
} else if (x$rows_missing > 0) {
paste0(big_mark(x$rows_missing), " more rows")
}
} else if (is.na(x$rows_total)) {
paste0("at least ", x$rows_min, " rows total")
} else {
paste0(x$rows_total, " rows total")
}
}

format_extra_cols <- function(x) {
if (length(x$extra) > 0) {
var_types <- paste0(names(x$extra), " <", x$extra, ">", collapse = ", ")
cat(wrap("Variables not shown: ", var_types, width = x$width),
".\n", sep = "")
var_types <- paste0(names(x$extra), " <", x$extra, ">")
if (x$n_extra > 0) {
if (x$n_extra < length(var_types)) {
var_types <- c(var_types[seq_len(x$n_extra)], "...")
}
vars <- paste0(": ", paste(var_types, collapse = ", "))
} else {
vars <- ""
}
paste0(length(x$extra), " ",
if (!identical(x$rows_total, 0L)) "more ",
"variables", vars)
}
invisible()
}

#' knit_print method for trunc mat
Expand All @@ -147,9 +188,10 @@ print.trunc_mat <- function(x, ...) {
knit_print.trunc_mat <- function(x, options) {
kable <- knitr::kable(x$table, row.names = FALSE)

if (length(x$extra) > 0) {
var_types <- paste0(names(x$extra), " <", x$extra, ">", collapse = ", ")
extra <- wrap("\n(_Variables not shown_: ", var_types, ")", width = x$width)
extra <- format_extra(x)

if (length(extra) > 0) {
extra <- wrap("(", paste(extra, collapse = ", "), ")", width = x$width)
} else {
extra <- "\n"
}
Expand Down
41 changes: 17 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ You can create a tibble from an existing object with `as_data_frame()`:
``` r
library(tibble)
as_data_frame(iris)
#> Source: local data frame [150 x 5]
#>
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> <dbl> <dbl> <dbl> <dbl> <fctr>
#> 1 5.1 3.5 1.4 0.2 setosa
Expand All @@ -29,7 +27,7 @@ as_data_frame(iris)
#> 8 5.0 3.4 1.5 0.2 setosa
#> 9 4.4 2.9 1.4 0.2 setosa
#> 10 4.9 3.1 1.5 0.1 setosa
#> .. ... ... ... ... ...
#> ... with 140 more rows
```

This will work for reasonable inputs that are already data.frame, list, matrix, or table.
Expand All @@ -38,8 +36,6 @@ You can also create a new tibble from vectors that represent the columns with `d

``` r
data_frame(x = 1:5, y = 1, z = x ^ 2 + y)
#> Source: local data frame [5 x 3]
#>
#> x y z
#> <int> <dbl> <dbl>
#> 1 1 1 2
Expand All @@ -59,8 +55,6 @@ frame_data(
"a", 2, 3.6,
"b", 1, 8.5
)
#> Source: local data frame [2 x 3]
#>
#> x y z
#> <chr> <dbl> <dbl>
#> 1 a 2 3.6
Expand All @@ -84,23 +78,22 @@ Tibbles have a refined print method that shows only the first 10 rows, and all t
``` r
library(nycflights13)
flights
#> Source: local data frame [336,776 x 16]
#>
#> year month day dep_time dep_delay arr_time arr_delay carrier tailnum
#> <int> <int> <int> <int> <dbl> <int> <dbl> <chr> <chr>
#> 1 2013 1 1 517 2 830 11 UA N14228
#> 2 2013 1 1 533 4 850 20 UA N24211
#> 3 2013 1 1 542 2 923 33 AA N619AA
#> 4 2013 1 1 544 -1 1004 -18 B6 N804JB
#> 5 2013 1 1 554 -6 812 -25 DL N668DN
#> 6 2013 1 1 554 -4 740 12 UA N39463
#> 7 2013 1 1 555 -5 913 19 B6 N516JB
#> 8 2013 1 1 557 -3 709 -14 EV N829AS
#> 9 2013 1 1 557 -3 838 -8 B6 N593JB
#> 10 2013 1 1 558 -2 753 8 AA N3ALAA
#> .. ... ... ... ... ... ... ... ... ...
#> Variables not shown: flight <int>, origin <chr>, dest <chr>, air_time
#> <dbl>, distance <dbl>, hour <dbl>, minute <dbl>.
#> year month day dep_time sched_dep_time dep_delay arr_time
#> <int> <int> <int> <int> <int> <dbl> <int>
#> 1 2013 1 1 517 515 2 830
#> 2 2013 1 1 533 529 4 850
#> 3 2013 1 1 542 540 2 923
#> 4 2013 1 1 544 545 -1 1004
#> 5 2013 1 1 554 600 -6 812
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I like getting rid of this line - it is nice to have an up front summary, but it does cost two lines.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's still there for the non-data.frame sources in dplyr. The information is now available in the summary lines.

How about:

as_data_frame(iris)
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#>           <dbl>       <dbl>        <dbl>       <dbl>  <fctr>
#> 1           5.1         3.5          1.4         0.2  setosa
#> 2           4.9         3.0          1.4         0.2  setosa
#> 3           4.7         3.2          1.3         0.2  setosa
#> 4           4.6         3.1          1.5         0.2  setosa
#> 5           5.0         3.6          1.4         0.2  setosa
#> 6           5.4         3.9          1.7         0.4  setosa
#> 7           4.6         3.4          1.4         0.3  setosa
#> 8           5.0         3.4          1.5         0.2  setosa
#> 9           4.4         2.9          1.4         0.2  setosa
#> 10          4.9         3.1          1.5         0.1  setosa
#> ... [150 x 5], with 140 more rows

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or even:

library(tibble)
as_data_frame(iris)
#>    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#>           <dbl>       <dbl>        <dbl>       <dbl>  <fctr>
#> 1           5.1         3.5          1.4         0.2  setosa
#> 2           4.9         3.0          1.4         0.2  setosa
#> 3           4.7         3.2          1.3         0.2  setosa
#> 4           4.6         3.1          1.5         0.2  setosa
#> 5           5.0         3.6          1.4         0.2  setosa
#> 6           5.4         3.9          1.7         0.4  setosa
#> 7           4.6         3.4          1.4         0.3  setosa
#> 8           5.0         3.4          1.5         0.2  setosa
#> 9           4.4         2.9          1.4         0.2  setosa
#> 10          4.9         3.1          1.5         0.1  setosa
#> ... [150 x 5]

#> 6 2013 1 1 554 558 -4 740
#> 7 2013 1 1 555 600 -5 913
#> 8 2013 1 1 557 600 -3 709
#> 9 2013 1 1 557 600 -3 838
#> 10 2013 1 1 558 600 -2 753
#> ... with 336,766 more rows
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think "and" would be better than "with" here to better line up with the next line

#> ... and 12 more variables (sched_arr_time <int>, arr_delay <dbl>, carrier
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe just and 12 more variables: (i.e. drop the parens)

#> <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>, air_time
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice if we could keep the type with the variable name, but that's probably going to be tricky

#> <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <time>)
```

Tibbles are strict about subsetting. If you try to access a variable that does not exist, you'll get an error:
Expand Down
6 changes: 2 additions & 4 deletions tests/testthat/output/trunc_mat/all--30.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
Source: local data frame [2 x 8]

a b c d
<dbl> <int> <lgl> <chr>
1 1.0 1 TRUE a
2 2.5 2 FALSE b
Variables not shown: e
... with 4 more variables: e
<fctr>, f <date>, g <time>,
h <list>.
h <list>
5 changes: 5 additions & 0 deletions tests/testthat/output/trunc_mat/all-1-30-0.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
a b c d
<dbl> <int> <lgl> <chr>
1 1 1 TRUE a
... with 1 more rows, and 4
more variables
13 changes: 6 additions & 7 deletions tests/testthat/output/trunc_mat/all-1-30-2.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
a b c d
<dbl> <int> <lgl> <chr>
1 1 1 TRUE a
.. ... ... ... ...
Variables not shown: e
<fctr>, f <date>, and 2
more <...>.
a b c d
<dbl> <int> <lgl> <chr>
1 1 1 TRUE a
... with 1 more rows, and 4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a test case where the length of a variable name is greater than the width?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's in the iris-3-5 test case below (width = 5).

more variables: e <fctr>, f
<date>, ...
2 changes: 1 addition & 1 deletion tests/testthat/output/trunc_mat/all-knit-60.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
|1.0 |1 |TRUE |a |a |2015-12-10 |
|2.5 |2 |FALSE |b |b |2015-12-11 |

(_Variables not shown_: g <time>, h <list>)
(with 2 more variables: g <time>, h <list>)
4 changes: 1 addition & 3 deletions tests/testthat/output/trunc_mat/iris--70.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
Source: local data frame [150 x 5]

Sepal.Length Sepal.Width Petal.Length Petal.Width Species
<dbl> <dbl> <dbl> <dbl> <fctr>
1 5.1 3.5 1.4 0.2 setosa
Expand All @@ -12,4 +10,4 @@ Source: local data frame [150 x 5]
8 5.0 3.4 1.5 0.2 setosa
9 4.4 2.9 1.4 0.2 setosa
10 4.9 3.1 1.5 0.1 setosa
.. ... ... ... ... ...
... with 140 more rows
27 changes: 15 additions & 12 deletions tests/testthat/output/trunc_mat/iris-3-5.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
Source: local data frame [150 x 5]

Sepal.Length
<dbl>
1 5.1
2 4.9
3 4.7
.. ...
Variables
not
shown:
Sepal.Length
<dbl>
1 5.1
2 4.9
3 4.7
...
with
147
more
rows,
and
4
more
variables:
Sepal.Width
<dbl>,
Petal.Length
<dbl>,
Petal.Width
<dbl>,
Species
<fctr>.
<fctr>
22 changes: 10 additions & 12 deletions tests/testthat/output/trunc_mat/iris-5-30.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
Source: local data frame [150 x 5]

Sepal.Length Sepal.Width
<dbl> <dbl>
1 5.1 3.5
2 4.9 3.0
3 4.7 3.2
4 4.6 3.1
5 5.0 3.6
.. ... ...
Variables not shown:
Sepal.Length Sepal.Width
<dbl> <dbl>
1 5.1 3.5
2 4.9 3.0
3 4.7 3.2
4 4.6 3.1
5 5.0 3.6
... with 145 more rows, and 3
more variables:
Petal.Length <dbl>,
Petal.Width <dbl>, Species
<fctr>.
<fctr>
13 changes: 13 additions & 0 deletions tests/testthat/output/trunc_mat/iris_unk-10-70.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
<dbl> <dbl> <dbl> <dbl> <fctr>
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
7 4.6 3.4 1.4 0.3 setosa
8 5.0 3.4 1.5 0.2 setosa
9 4.4 2.9 1.4 0.2 setosa
10 4.9 3.1 1.5 0.1 setosa
... with more rows
Loading