diff --git a/r/R/array.R b/r/R/array.R index bd1e161f0ba..3333218cd4d 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -74,7 +74,10 @@ Array <- R6Class("Array", ApproxEquals = function(other) Array__ApproxEquals(self, other), data = function() shared_ptr(ArrayData, Array__data(self)), as_vector = function() Array__as_vector(self), - ToString = function() Array__ToString(self), + ToString = function() { + typ <- paste0("<", self$type$ToString(), ">") + paste(typ, Array__ToString(self), sep = "\n") + }, Slice = function(offset, length = NULL){ if (is.null(length)) { shared_ptr(Array, Array__Slice1(self, offset)) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 00a911bbe25..7ce881f90b0 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -54,15 +54,18 @@ Object <- R6Class("Object", self$`.:xp:.` <- xp }, print = function(...){ - cat(class(self)[[1]], "\n") + cat(class(self)[[1]], "\n", sep = "") if (!is.null(self$ToString)){ - cat(self$ToString(), "\n") + cat(self$ToString(), "\n", sep = "") } invisible(self) } ) ) +#' @export +`!=.Object` <- function(lhs, rhs) !(lhs == rhs) + shared_ptr <- function(class, xp) { if (!shared_ptr_is_null(xp)) class$new(xp) } diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index a6e4946d6e0..3bf72985bd9 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -69,6 +69,20 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object, }, Validate = function() { ChunkedArray__Validate(self) + }, + ToString = function() { + out <- self$chunk(0)$ToString() + if (self$num_chunks > 1) { + # Regardless of whether the first array prints with ellipsis, we need + # to ellipsize because there's more data than is contained in this + # chunk + if (grepl("...\n", out, fixed = TRUE)) { + out <- sub("\\.\\.\\..*$", "...\n]", out) + } else { + out <- sub("\\n\\]$", ",\n ...\n]", out) + } + } + out } ), active = list( diff --git a/r/R/dictionary.R b/r/R/dictionary.R index ab33c3e1982..6273ffc2c87 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -31,7 +31,11 @@ #' @name DictionaryType DictionaryType <- R6Class("DictionaryType", inherit = FixedWidthType, - + public = list( + ToString = function() { + prettier_dictionary_type(DataType__ToString(self)) + } + ), active = list( index_type = function() DataType$create(DictionaryType__index_type(self)), value_type = function() DataType$create(DictionaryType__value_type(self)), @@ -39,20 +43,27 @@ DictionaryType <- R6Class("DictionaryType", ordered = function() DictionaryType__ordered(self) ) ) +DictionaryType$create <- function(index_type = int32(), + value_type = utf8(), + ordered = FALSE) { + assert_is(index_type, "DataType") + assert_is(value_type, "DataType") + shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered)) +} #' Create a dictionary type #' -#' @param index_type index type, e.g. [int32()] -#' @param value_type value type, probably [utf8()] -#' @param ordered Is this an ordered dictionary ? +#' @param index_type A DataType for the indices (default [int32()]) +#' @param value_type A DataType for the values (default [utf8()]) +#' @param ordered Is this an ordered dictionary (default `FALSE`)? #' #' @return A [DictionaryType] #' @seealso [Other Arrow data types][data-type] #' @export -dictionary <- function(index_type, value_type, ordered = FALSE) { - assert_that( - inherits(index_type, "DataType"), - inherits(index_type, "DataType") - ) - shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered)) +dictionary <- DictionaryType$create + +prettier_dictionary_type <- function(x) { + # Prettier format the "ordered" attribute + x <- sub(", ordered=0", "", x) + sub("ordered=1", "ordered", x) } diff --git a/r/R/field.R b/r/R/field.R index fc5abc879cb..152099c0d10 100644 --- a/r/R/field.R +++ b/r/R/field.R @@ -34,7 +34,7 @@ Field <- R6Class("Field", inherit = Object, public = list( ToString = function() { - Field__ToString(self) + prettier_dictionary_type(Field__ToString(self)) }, Equals = function(other) { inherits(other, "Field") && Field__Equals(self, other) diff --git a/r/R/record-batch.R b/r/R/record-batch.R index 80796b90d2f..3814a2a2b78 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -111,6 +111,7 @@ RecordBatch <- R6Class("RecordBatch", inherit = Object, }, serialize = function() ipc___SerializeRecordBatch__Raw(self), + ToString = function() ToString_tabular(self), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { assert_is(target_schema, "Schema") @@ -246,3 +247,11 @@ tail.RecordBatch <- function(x, n = 6L, ...) { } x$Slice(n) } + +ToString_tabular <- function(x, ...) { + # Generic to work with both RecordBatch and Table + sch <- unlist(strsplit(x$schema$ToString(), "\n")) + sch <- sub("(.*): (.*)", "$\\1 <\\2>", sch) + dims <- sprintf("%s rows x %s columns", nrow(x), ncol(x)) + paste(c(dims, sch), collapse = "\n") +} diff --git a/r/R/schema.R b/r/R/schema.R index 9f28fb53d17..a2ee00c0ac6 100644 --- a/r/R/schema.R +++ b/r/R/schema.R @@ -49,7 +49,7 @@ Schema <- R6Class("Schema", inherit = Object, public = list( - ToString = function() Schema__ToString(self), + ToString = function() prettier_dictionary_type(Schema__ToString(self)), num_fields = function() Schema__num_fields(self), field = function(i) shared_ptr(Field, Schema__field(self, i)), serialize = function() Schema__serialize(self), diff --git a/r/R/table.R b/r/R/table.R index 47626b160d9..bd3f447c84e 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -104,6 +104,7 @@ Table <- R6Class("Table", inherit = Object, field = function(i) shared_ptr(Field, Table__field(self, i)), serialize = function(output_stream, ...) write_table(self, output_stream, ...), + ToString = function() ToString_tabular(self), cast = function(target_schema, safe = TRUE, options = cast_options(safe)) { assert_is(target_schema, "Schema") diff --git a/r/R/type.R b/r/R/type.R index 36d81e293c0..1e130787c68 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -16,12 +16,6 @@ # under the License. #' @include arrow-package.R - -#' @export -`!=.Object` <- function(lhs, rhs){ - !(lhs == rhs) -} - #' @title class arrow::DataType #' #' @usage NULL diff --git a/r/README.Rmd b/r/README.Rmd index b07d8c0bdcd..cb57d1ab992 100644 --- a/r/README.Rmd +++ b/r/README.Rmd @@ -54,9 +54,13 @@ When installing from source, if the R and C++ library versions do not match, ins library(arrow) set.seed(24) -tab <- Table$create(x = 1:10, y = rnorm(10)) -tab$schema +tab <- Table$create( + x = 1:10, + y = rnorm(10), + z = as.factor(rep(c("b", "c"), 5)) +) tab +tab$x as.data.frame(tab) ``` diff --git a/r/README.md b/r/README.md index 9733da91a95..937f00d85af 100644 --- a/r/README.md +++ b/r/README.md @@ -69,25 +69,44 @@ Arrow C++ library first. library(arrow) set.seed(24) -tab <- Table$create(x = 1:10, y = rnorm(10)) -tab$schema -#> Schema -#> x: int32 -#> y: double +tab <- Table$create( + x = 1:10, + y = rnorm(10), + z = as.factor(rep(c("b", "c"), 5)) +) tab #> Table +#> 10 rows x 3 columns +#> $x +#> $y +#> $z > +tab$x +#> ChunkedArray +#> +#> [ +#> 1, +#> 2, +#> 3, +#> 4, +#> 5, +#> 6, +#> 7, +#> 8, +#> 9, +#> 10 +#> ] as.data.frame(tab) -#> x y -#> 1 1 -0.545880758 -#> 2 2 0.536585304 -#> 3 3 0.419623149 -#> 4 4 -0.583627199 -#> 5 5 0.847460017 -#> 6 6 0.266021979 -#> 7 7 0.444585270 -#> 8 8 -0.466495124 -#> 9 9 -0.848370044 -#> 10 10 0.002311942 +#> x y z +#> 1 1 -0.545880758 b +#> 2 2 0.536585304 c +#> 3 3 0.419623149 b +#> 4 4 -0.583627199 c +#> 5 5 0.847460017 b +#> 6 6 0.266021979 c +#> 7 7 0.444585270 b +#> 8 8 -0.466495124 c +#> 9 9 -0.848370044 b +#> 10 10 0.002311942 c ``` ## Installing a development version diff --git a/r/man/dictionary.Rd b/r/man/dictionary.Rd index 183513e8c47..2716a173c78 100644 --- a/r/man/dictionary.Rd +++ b/r/man/dictionary.Rd @@ -4,14 +4,15 @@ \alias{dictionary} \title{Create a dictionary type} \usage{ -dictionary(index_type, value_type, ordered = FALSE) +dictionary(index_type = int32(), value_type = utf8(), + ordered = FALSE) } \arguments{ -\item{index_type}{index type, e.g. \code{\link[=int32]{int32()}}} +\item{index_type}{A DataType for the indices (default \code{\link[=int32]{int32()}})} -\item{value_type}{value type, probably \code{\link[=utf8]{utf8()}}} +\item{value_type}{A DataType for the values (default \code{\link[=utf8]{utf8()}})} -\item{ordered}{Is this an ordered dictionary ?} +\item{ordered}{Is this an ordered dictionary (default \code{FALSE})?} } \value{ A \link{DictionaryType} diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 95ef12d4c86..016b137f04c 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -52,6 +52,11 @@ test_that("Array", { expect_equal(z_dbl$as_vector(), as.numeric(4:5)) }) +test_that("Array print method includes type", { + x <- Array$create(c(1:10, 1:10, 1:5)) + expect_output(print(x), "Array\n\n[\n", fixed = TRUE) +}) + test_that("Array supports NA", { x_int <- Array$create(as.integer(c(1:10, NA))) x_dbl <- Array$create(as.numeric(c(1:10, NA))) @@ -257,7 +262,7 @@ test_that("array supports integer64", { expect_true(a$IsNull(3L)) }) -test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", { +test_that("array$as_vector() correctly handles all NA int64 (ARROW-3795)", { x <- bit64::as.integer64(NA) a <- Array$create(x) expect_true(is.na(a$as_vector())) diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 83959527f9c..97b8f694868 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -34,7 +34,7 @@ test_that("RecordBatch", { schema( int = int32(), dbl = float64(), lgl = boolean(), chr = utf8(), - fct = dictionary(int32(), Array$create(letters[1:10])) + fct = dictionary() ) ) expect_equal(batch$num_columns, 5L) @@ -69,12 +69,12 @@ test_that("RecordBatch", { col_fct <- batch$column(4) expect_true(inherits(col_fct, 'Array')) expect_equal(col_fct$as_vector(), tbl$fct) - expect_equal(col_fct$type, dictionary(int32(), Array$create(letters[1:10]))) + expect_equal(col_fct$type, dictionary()) batch2 <- batch$RemoveColumn(0) expect_equal( batch2$schema, - schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), Array$create(letters[1:10]))) + schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary()) ) expect_equal(batch2$column(0), batch$column(1)) expect_identical(as.data.frame(batch2), tbl[,-1]) @@ -120,6 +120,23 @@ test_that("head and tail on RecordBatch", { expect_identical(as.data.frame(tail(batch, -4)), tail(tbl, -4)) }) +test_that("RecordBatch print method", { + expect_output( + print(batch), + paste( + "RecordBatch", + "10 rows x 5 columns", + "$int ", + "$dbl ", + "$lgl ", + "$chr ", + "$fct >", + sep = "\n" + ), + fixed = TRUE + ) +}) + test_that("RecordBatch with 0 rows are supported", { tbl <- tibble::tibble( int = integer(), @@ -139,7 +156,7 @@ test_that("RecordBatch with 0 rows are supported", { dbl = float64(), lgl = boolean(), chr = utf8(), - fct = dictionary(int32(), Array$create(c("a", "b"))) + fct = dictionary() ) ) }) diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index 372af7d07bc..d1b00ed40e9 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -119,6 +119,23 @@ test_that("head and tail on Table", { expect_identical(as.data.frame(tail(tab, -4)), tail(tbl, -4)) }) +test_that("Table print method", { + expect_output( + print(tab), + paste( + "Table", + "10 rows x 5 columns", + "$int ", + "$dbl ", + "$lgl ", + "$chr ", + "$fct >", + sep = "\n" + ), + fixed = TRUE + ) +}) + test_that("table active bindings", { expect_identical(dim(tbl), dim(tab)) expect_is(tab$columns, "list") diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index ff3daef7936..0e2a21ba5e6 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -27,8 +27,8 @@ test_that("ChunkedArray", { y <- x$Slice(8) expect_equal(y$type, int32()) expect_equal(y$num_chunks, 3L) - expect_equal(y$length(), 17L) - expect_equal(y$as_vector(), c(9:10, 1:10, 1:5)) + expect_equal(length(y), 17L) + expect_equal(as.vector(y), c(9:10, 1:10, 1:5)) z <- x$Slice(8, 5) expect_equal(z$type, int32()) @@ -55,6 +55,55 @@ test_that("ChunkedArray", { expect_equal(z_dbl$as_vector(), as.numeric(3:4)) }) +test_that("print ChunkedArray", { + x1 <- chunked_array(c(1,2,3), c(4,5,6)) + expect_output( + print(x1), + paste( + "ChunkedArray", + "", + "[", + " 1,", + " 2,", + " 3,", + " ...", + "]", + sep = "\n" + ), + fixed = TRUE + ) + x2 <- chunked_array(1:30, c(4,5,6)) + expect_output( + print(x2), + paste( + "ChunkedArray", + "", + "[", + " 1,", + " 2,", + " 3,", + " 4,", + " 5,", + " 6,", + " 7,", + " 8,", + " 9,", + " 10,", + " ...", + "]", + sep = "\n" + ), + fixed = TRUE + ) + # If there's only one chunk, it should look like a regular Array + x3 <- chunked_array(1:30) + expect_output( + print(x3), + paste0("Chunked", paste(capture.output(print(Array$create(1:30))), collapse = "\n")), + fixed = TRUE + ) +}) + test_that("ChunkedArray handles !!! splicing", { data <- list(1, 2, 3) x <- chunked_array(!!!data) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index fd8bef1fc79..5a408ed3f6c 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -367,7 +367,9 @@ test_that("DictionaryType works as expected (ARROW-3355)", { expect_false(d == int32()) expect_equal(d$id, Type$DICTIONARY) expect_equal(d$bit_width, 32L) - expect_equal(d$ToString(), "dictionary") + expect_equal(d$ToString(), "dictionary") expect_equal(d$index_type, int32()) expect_equal(d$value_type, utf8()) + ord <- dictionary(ordered = TRUE) + expect_equal(ord$ToString(), "dictionary") }) diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R index d7de087d12f..8a4c88f46dd 100644 --- a/r/tests/testthat/test-field.R +++ b/r/tests/testthat/test-field.R @@ -28,3 +28,11 @@ test_that("field() factory", { test_that("Field validation", { expect_error(schema(b = 32), "b must be arrow::DataType, not numeric") }) + +test_that("Print method for field", { + expect_output(print(field("x", int32())), "Field\nx: int32") + expect_output( + print(field("zz", dictionary())), + "Field\nzz: dictionary" + ) +}) diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R index 19934c6e472..b6bfab0fdbd 100644 --- a/r/tests/testthat/test-type.R +++ b/r/tests/testthat/test-type.R @@ -33,10 +33,7 @@ test_that("type() infers from R type", { expect_equal(type(TRUE), boolean()) expect_equal(type(raw()), int8()) expect_equal(type(""), utf8()) - expect_equal( - type(iris$Species), - dictionary(int8(), Array$create(levels(iris$Species)), FALSE) - ) + expect_equal(type(iris$Species), dictionary()) expect_equal( type(lubridate::ymd_hms("2019-02-14 13:55:05")), timestamp(TimeUnit$MICRO, "GMT")