Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ r/README.Rmd
r/man/*.Rd
r/cran-comments.md
r/vignettes/*.Rmd
r/tests/testthat/test-*.txt
.gitattributes
ruby/red-arrow/.yardopts
rust/arrow/test/data/*.csv
Expand Down
4 changes: 4 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 1 addition & 12 deletions r/R/chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,7 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowObject,
ChunkedArray__Validate(self)
},
ToString = function() {
out <- self$chunk(0)$ToString()
if (self$num_chunks > 1) {
# Regardless of whether the first array prints with ellipsis, we need
# to ellipsize because there's more data than is contained in this
# chunk
if (grepl("...\n", out, fixed = TRUE)) {
out <- sub("\\.\\.\\..*$", "...\n]", out)
} else {
out <- sub("\\n\\]$", ",\n ...\n]", out)
}
}
out
ChunkedArray__ToString(self)
},
Equals = function(other, ...) {
inherits(other, "ChunkedArray") && ChunkedArray__Equals(self, other)
Expand Down
212 changes: 142 additions & 70 deletions r/src/array_to_vector.cpp

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions r/src/arrowExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions r/src/arrow_rcpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ struct symbols {
struct data {
static SEXP classes_POSIXct;
static SEXP classes_metadata_r;
static SEXP classes_factor;
static SEXP classes_ordered;

static SEXP names_metadata;
static SEXP classes_vctrs_list_of;
Expand Down
5 changes: 5 additions & 0 deletions r/src/chunkedarray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,9 @@ bool ChunkedArray__Equals(const std::shared_ptr<arrow::ChunkedArray>& x,
return x->Equals(y);
}

// [[arrow::export]]
std::string ChunkedArray__ToString(const std::shared_ptr<arrow::ChunkedArray>& x) {
return x->ToString();
}

#endif
48 changes: 17 additions & 31 deletions r/src/symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,35 +29,17 @@ SEXP symbols::serialize_arrow_r_metadata = Rf_install(".serialize_arrow_r_metada
SEXP symbols::as_list = Rf_install("as.list");
SEXP symbols::ptype = Rf_install("ptype");

SEXP get_classes_POSIXct() {
SEXP classes = Rf_allocVector(STRSXP, 2);
R_PreserveObject(classes);
SET_STRING_ELT(classes, 0, Rf_mkChar("POSIXct"));
SET_STRING_ELT(classes, 1, Rf_mkChar("POSIXt"));
return classes;
}
SEXP preserved_strings(std::initializer_list<std::string> list) {
size_t n = list.size();
SEXP s = Rf_allocVector(STRSXP, n);
R_PreserveObject(s);

SEXP get_classes_metadata_r() {
SEXP classes = Rf_mkString("arrow_r_metadata");
R_PreserveObject(classes);
return classes;
}
auto it = list.begin();
for (size_t i = 0; i < n; i++, ++it) {
SET_STRING_ELT(s, i, Rf_mkCharLen(it->c_str(), it->size()));
}

SEXP get_names_metadata() {
SEXP names = Rf_allocVector(STRSXP, 2);
R_PreserveObject(names);
SET_STRING_ELT(names, 0, Rf_mkChar("attributes"));
SET_STRING_ELT(names, 1, Rf_mkChar("columns"));
return names;
}

SEXP get_classes_vctrs_list_of() {
SEXP classes = Rf_allocVector(STRSXP, 3);
R_PreserveObject(classes);
SET_STRING_ELT(classes, 0, Rf_mkChar("vctrs_list_of"));
SET_STRING_ELT(classes, 1, Rf_mkChar("vctrs_vctr"));
SET_STRING_ELT(classes, 2, Rf_mkChar("list"));
return classes;
return s;
}

SEXP get_empty_raw() {
Expand All @@ -66,10 +48,14 @@ SEXP get_empty_raw() {
return res;
}

SEXP data::classes_POSIXct = get_classes_POSIXct();
SEXP data::classes_metadata_r = get_classes_metadata_r();
SEXP data::names_metadata = get_names_metadata();
SEXP data::classes_vctrs_list_of = get_classes_vctrs_list_of();
SEXP data::classes_POSIXct = preserved_strings({"POSIXct", "POSIXt"});
SEXP data::classes_metadata_r = preserved_strings({"arrow_r_metadata"});
SEXP data::classes_factor = preserved_strings({"factor"});
SEXP data::classes_ordered = preserved_strings({"ordered", "factor"});

SEXP data::names_metadata = preserved_strings({"attributes", "columns"});
SEXP data::classes_vctrs_list_of =
preserved_strings({"vctrs_list_of", "vctrs_vctr", "list"});
SEXP data::empty_raw = get_empty_raw();

void inspect(SEXP obj) {
Expand Down
12 changes: 12 additions & 0 deletions r/tests/testthat/test-Table.R
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,15 @@ test_that("Can create table with specific dictionary types", {
}
}
})

test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {
b1 <- record_batch(f = factor(c("a"), levels = c("a", "b")))
b2 <- record_batch(f = factor(c("c"), levels = c("c", "d")))
b3 <- record_batch(f = factor(NA, levels = "a"))
b4 <- record_batch(f = factor())

res <- tibble::tibble(f = factor(c("a", "c", NA), levels = c("a", "b", "c", "d")))
tab <- Table$create(b1, b2, b3, b4)

expect_identical(as.data.frame(tab), res)
})
64 changes: 18 additions & 46 deletions r/tests/testthat/test-chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,52 +93,12 @@ test_that("ChunkedArray", {
})

test_that("print ChunkedArray", {
x1 <- chunked_array(c(1,2,3), c(4,5,6))
expect_output(
print(x1),
paste(
"ChunkedArray",
"<double>",
"[",
" 1,",
" 2,",
" 3,",
" ...",
"]",
sep = "\n"
),
fixed = TRUE
)
x2 <- chunked_array(1:30, c(4,5,6))
expect_output(
print(x2),
paste(
"ChunkedArray",
"<int32>",
"[",
" 1,",
" 2,",
" 3,",
" 4,",
" 5,",
" 6,",
" 7,",
" 8,",
" 9,",
" 10,",
" ...",
"]",
sep = "\n"
),
fixed = TRUE
)
# If there's only one chunk, it should look like a regular Array
x3 <- chunked_array(1:30)
expect_output(
print(x3),
paste0("Chunked", paste(capture.output(print(Array$create(1:30))), collapse = "\n")),
fixed = TRUE
)
verify_output(test_path("test-chunked-array.txt"), {
chunked_array(c(1,2,3), c(4,5,6))
chunked_array(1:30, c(4,5,6))
chunked_array(1:30)
chunked_array(factor(c("a", "b")), factor(c("c", "d")))
})
})

test_that("ChunkedArray handles !!! splicing", {
Expand Down Expand Up @@ -392,3 +352,15 @@ test_that("ChunkedArray$Equals", {
expect_true(a$Equals(b))
expect_false(a$Equals(vec))
})

test_that("Converting a chunked array unifies factors (ARROW-8374)", {
f1 <- factor(c("a"), levels = c("a", "b"))
f2 <- factor(c("c"), levels = c("c", "d"))
f3 <- factor(NA, levels = "a")
f4 <- factor()

res <- factor(c("a", "c", NA), levels = c("a", "b", "c", "d"))
ca <- ChunkedArray$create(f1, f2, f3, f4)

expect_identical(ca$as_vector(), res)
})
103 changes: 103 additions & 0 deletions r/tests/testthat/test-chunked-array.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
> chunked_array(c(1, 2, 3), c(4, 5, 6))
ChunkedArray
[
[
1,
2,
3
],
[
4,
5,
6
]
]

> chunked_array(1:30, c(4, 5, 6))
ChunkedArray
[
[
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
...
21,
22,
23,
24,
25,
26,
27,
28,
29,
30
],
[
4,
5,
6
]
]

> chunked_array(1:30)
ChunkedArray
[
[
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
...
21,
22,
23,
24,
25,
26,
27,
28,
29,
30
]
]

> chunked_array(factor(c("a", "b")), factor(c("c", "d")))
ChunkedArray
[

-- dictionary:
[
"a",
"b"
]
-- indices:
[
0,
1
],

-- dictionary:
[
"c",
"d"
]
-- indices:
[
0,
1
]
]