Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ Array <- R6Class("Array",
ApproxEquals = function(other) Array__ApproxEquals(self, other),
data = function() shared_ptr(ArrayData, Array__data(self)),
as_vector = function() Array__as_vector(self),
ToString = function() Array__ToString(self),
ToString = function() {
typ <- paste0("<", self$type$ToString(), ">")
paste(typ, Array__ToString(self), sep = "\n")
},
Slice = function(offset, length = NULL){
if (is.null(length)) {
shared_ptr(Array, Array__Slice1(self, offset))
Expand Down
7 changes: 5 additions & 2 deletions r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,18 @@ Object <- R6Class("Object",
self$`.:xp:.` <- xp
},
print = function(...){
cat(class(self)[[1]], "\n")
cat(class(self)[[1]], "\n", sep = "")
if (!is.null(self$ToString)){
cat(self$ToString(), "\n")
cat(self$ToString(), "\n", sep = "")
}
invisible(self)
}
)
)

#' @export
`!=.Object` <- function(lhs, rhs) !(lhs == rhs)

shared_ptr <- function(class, xp) {
if (!shared_ptr_is_null(xp)) class$new(xp)
}
Expand Down
14 changes: 14 additions & 0 deletions r/R/chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,20 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object,
},
Validate = function() {
ChunkedArray__Validate(self)
},
ToString = function() {
out <- self$chunk(0)$ToString()
if (self$num_chunks > 1) {
# Regardless of whether the first array prints with ellipsis, we need
# to ellipsize because there's more data than is contained in this
# chunk
if (grepl("...\n", out, fixed = TRUE)) {
out <- sub("\\.\\.\\..*$", "...\n]", out)
} else {
out <- sub("\\n\\]$", ",\n ...\n]", out)
}
}
out
}
),
active = list(
Expand Down
31 changes: 21 additions & 10 deletions r/R/dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,39 @@
#' @name DictionaryType
DictionaryType <- R6Class("DictionaryType",
inherit = FixedWidthType,

public = list(
ToString = function() {
prettier_dictionary_type(DataType__ToString(self))
}
),
active = list(
index_type = function() DataType$create(DictionaryType__index_type(self)),
value_type = function() DataType$create(DictionaryType__value_type(self)),
name = function() DictionaryType__name(self),
ordered = function() DictionaryType__ordered(self)
)
)
DictionaryType$create <- function(index_type = int32(),
value_type = utf8(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks weird to have default values for the type parameters.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe so, but this is what an R factor type gets translated to, so it seemed reasonable.

ordered = FALSE) {
assert_is(index_type, "DataType")
assert_is(value_type, "DataType")
shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered))
}

#' Create a dictionary type
#'
#' @param index_type index type, e.g. [int32()]
#' @param value_type value type, probably [utf8()]
#' @param ordered Is this an ordered dictionary ?
#' @param index_type A DataType for the indices (default [int32()])
#' @param value_type A DataType for the values (default [utf8()])
#' @param ordered Is this an ordered dictionary (default `FALSE`)?
#'
#' @return A [DictionaryType]
#' @seealso [Other Arrow data types][data-type]
#' @export
dictionary <- function(index_type, value_type, ordered = FALSE) {
assert_that(
inherits(index_type, "DataType"),
inherits(index_type, "DataType")
)
shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered))
dictionary <- DictionaryType$create

prettier_dictionary_type <- function(x) {
# Prettier format the "ordered" attribute
x <- sub(", ordered=0", "", x)
sub("ordered=1", "ordered", x)
}
2 changes: 1 addition & 1 deletion r/R/field.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
Field <- R6Class("Field", inherit = Object,
public = list(
ToString = function() {
Field__ToString(self)
prettier_dictionary_type(Field__ToString(self))
},
Equals = function(other) {
inherits(other, "Field") && Field__Equals(self, other)
Expand Down
9 changes: 9 additions & 0 deletions r/R/record-batch.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ RecordBatch <- R6Class("RecordBatch", inherit = Object,
},

serialize = function() ipc___SerializeRecordBatch__Raw(self),
ToString = function() ToString_tabular(self),

cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
assert_is(target_schema, "Schema")
Expand Down Expand Up @@ -246,3 +247,11 @@ tail.RecordBatch <- function(x, n = 6L, ...) {
}
x$Slice(n)
}

ToString_tabular <- function(x, ...) {
# Generic to work with both RecordBatch and Table
sch <- unlist(strsplit(x$schema$ToString(), "\n"))
sch <- sub("(.*): (.*)", "$\\1 <\\2>", sch)
dims <- sprintf("%s rows x %s columns", nrow(x), ncol(x))
paste(c(dims, sch), collapse = "\n")
}
2 changes: 1 addition & 1 deletion r/R/schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
Schema <- R6Class("Schema",
inherit = Object,
public = list(
ToString = function() Schema__ToString(self),
ToString = function() prettier_dictionary_type(Schema__ToString(self)),
num_fields = function() Schema__num_fields(self),
field = function(i) shared_ptr(Field, Schema__field(self, i)),
serialize = function() Schema__serialize(self),
Expand Down
1 change: 1 addition & 0 deletions r/R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Table <- R6Class("Table", inherit = Object,
field = function(i) shared_ptr(Field, Table__field(self, i)),

serialize = function(output_stream, ...) write_table(self, output_stream, ...),
ToString = function() ToString_tabular(self),

cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
assert_is(target_schema, "Schema")
Expand Down
6 changes: 0 additions & 6 deletions r/R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@
# under the License.

#' @include arrow-package.R

#' @export
`!=.Object` <- function(lhs, rhs){
!(lhs == rhs)
}

#' @title class arrow::DataType
#'
#' @usage NULL
Expand Down
8 changes: 6 additions & 2 deletions r/README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,13 @@ When installing from source, if the R and C++ library versions do not match, ins
library(arrow)
set.seed(24)

tab <- Table$create(x = 1:10, y = rnorm(10))
tab$schema
tab <- Table$create(
x = 1:10,
y = rnorm(10),
z = as.factor(rep(c("b", "c"), 5))
)
tab
tab$x
as.data.frame(tab)
```

Expand Down
51 changes: 35 additions & 16 deletions r/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,25 +69,44 @@ Arrow C++ library first.
library(arrow)
set.seed(24)

tab <- Table$create(x = 1:10, y = rnorm(10))
tab$schema
#> Schema
#> x: int32
#> y: double
tab <- Table$create(
x = 1:10,
y = rnorm(10),
z = as.factor(rep(c("b", "c"), 5))
)
tab
#> Table
#> 10 rows x 3 columns
#> $x <int32>
#> $y <double>
#> $z <dictionary<values=string, indices=int8>>
tab$x
#> ChunkedArray
#> <int32>
#> [
#> 1,
#> 2,
#> 3,
#> 4,
#> 5,
#> 6,
#> 7,
#> 8,
#> 9,
#> 10
#> ]
as.data.frame(tab)
#> x y
#> 1 1 -0.545880758
#> 2 2 0.536585304
#> 3 3 0.419623149
#> 4 4 -0.583627199
#> 5 5 0.847460017
#> 6 6 0.266021979
#> 7 7 0.444585270
#> 8 8 -0.466495124
#> 9 9 -0.848370044
#> 10 10 0.002311942
#> x y z
#> 1 1 -0.545880758 b
#> 2 2 0.536585304 c
#> 3 3 0.419623149 b
#> 4 4 -0.583627199 c
#> 5 5 0.847460017 b
#> 6 6 0.266021979 c
#> 7 7 0.444585270 b
#> 8 8 -0.466495124 c
#> 9 9 -0.848370044 b
#> 10 10 0.002311942 c
```

## Installing a development version
Expand Down
9 changes: 5 additions & 4 deletions r/man/dictionary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion r/tests/testthat/test-Array.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ test_that("Array", {
expect_equal(z_dbl$as_vector(), as.numeric(4:5))
})

test_that("Array print method includes type", {
x <- Array$create(c(1:10, 1:10, 1:5))
expect_output(print(x), "Array\n<int32>\n[\n", fixed = TRUE)
})

test_that("Array supports NA", {
x_int <- Array$create(as.integer(c(1:10, NA)))
x_dbl <- Array$create(as.numeric(c(1:10, NA)))
Expand Down Expand Up @@ -257,7 +262,7 @@ test_that("array supports integer64", {
expect_true(a$IsNull(3L))
})

test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", {
test_that("array$as_vector() correctly handles all NA int64 (ARROW-3795)", {
x <- bit64::as.integer64(NA)
a <- Array$create(x)
expect_true(is.na(a$as_vector()))
Expand Down
25 changes: 21 additions & 4 deletions r/tests/testthat/test-RecordBatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test_that("RecordBatch", {
schema(
int = int32(), dbl = float64(),
lgl = boolean(), chr = utf8(),
fct = dictionary(int32(), Array$create(letters[1:10]))
fct = dictionary()
)
)
expect_equal(batch$num_columns, 5L)
Expand Down Expand Up @@ -69,12 +69,12 @@ test_that("RecordBatch", {
col_fct <- batch$column(4)
expect_true(inherits(col_fct, 'Array'))
expect_equal(col_fct$as_vector(), tbl$fct)
expect_equal(col_fct$type, dictionary(int32(), Array$create(letters[1:10])))
expect_equal(col_fct$type, dictionary())

batch2 <- batch$RemoveColumn(0)
expect_equal(
batch2$schema,
schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), Array$create(letters[1:10])))
schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary())
)
expect_equal(batch2$column(0), batch$column(1))
expect_identical(as.data.frame(batch2), tbl[,-1])
Expand Down Expand Up @@ -120,6 +120,23 @@ test_that("head and tail on RecordBatch", {
expect_identical(as.data.frame(tail(batch, -4)), tail(tbl, -4))
})

test_that("RecordBatch print method", {
expect_output(
print(batch),
paste(
"RecordBatch",
"10 rows x 5 columns",
"$int <int32>",
"$dbl <double>",
"$lgl <bool>",
"$chr <string>",
"$fct <dictionary<values=string, indices=int8>>",
sep = "\n"
),
fixed = TRUE
)
})

test_that("RecordBatch with 0 rows are supported", {
tbl <- tibble::tibble(
int = integer(),
Expand All @@ -139,7 +156,7 @@ test_that("RecordBatch with 0 rows are supported", {
dbl = float64(),
lgl = boolean(),
chr = utf8(),
fct = dictionary(int32(), Array$create(c("a", "b")))
fct = dictionary()
)
)
})
Expand Down
17 changes: 17 additions & 0 deletions r/tests/testthat/test-Table.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,23 @@ test_that("head and tail on Table", {
expect_identical(as.data.frame(tail(tab, -4)), tail(tbl, -4))
})

test_that("Table print method", {
expect_output(
print(tab),
paste(
"Table",
"10 rows x 5 columns",
"$int <int32>",
"$dbl <double>",
"$lgl <bool>",
"$chr <string>",
"$fct <dictionary<values=string, indices=int8>>",
sep = "\n"
),
fixed = TRUE
)
})

test_that("table active bindings", {
expect_identical(dim(tbl), dim(tab))
expect_is(tab$columns, "list")
Expand Down
Loading