diff --git a/dev/tasks/r/github.linux.extra.packages.yml b/dev/tasks/r/github.linux.extra.packages.yml new file mode 100644 index 00000000000..bb486c72a06 --- /dev/null +++ b/dev/tasks/r/github.linux.extra.packages.yml @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{% import 'macros.jinja' as macros with context %} + +{{ macros.github_header() }} + +jobs: + extra-packages: + name: "extra package roundtrip tests" + runs-on: ubuntu-latest + strategy: + fail-fast: false + env: + ARROW_R_DEV: "FALSE" + ARROW_R_FORCE_EXTRA_PACKAGE_TESTS: TRUE + steps: + {{ macros.github_checkout_arrow()|indent }} + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + - uses: r-lib/actions/setup-pandoc@v2 + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + working-directory: 'arrow/r' + extra-packages: | + any::data.table + any::rcmdcheck + any::readr + any::units + - name: Build arrow package + run: | + R CMD build --no-build-vignettes arrow/r + R CMD INSTALL --install-tests --no-test-load --no-byte-compile arrow_*.tar.gz + - name: run tests + run: | + testthat::test_package("arrow", filter = "extra-package-roundtrip") + shell: Rscript {0} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 6e1f7609a98..a9da7eb2889 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1309,6 +1309,10 @@ tasks: ci: github template: r/github.linux.rchk.yml + test-r-extra-packages: + ci: github + template: r/github.linux.extra.packages.yml + test-r-linux-as-cran: ci: github template: r/github.linux.cran.yml diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R new file mode 100644 index 00000000000..09a87ef19d5 --- /dev/null +++ b/r/tests/testthat/test-extra-package-roundtrip.R @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +skip_on_cran() + +# Any additional package that we test here that is not already in DESCRIPTION should be +# added to dev/tasks/r/github.linux.extra.packages.yml in the r-lib/actions/setup-r-dependencies@v2 +# step so that they are installed + available in that CI job. + +# So that we can force these in CI +load_or_skip <- function(pkg) { + if (identical(tolower(Sys.getenv("ARROW_R_FORCE_EXTRA_PACKAGE_TESTS")), "true")) { + # because of this indirection on the package name we also avoid a CHECK note and + # we don't otherwise need to Suggest this + requireNamespace(pkg, quietly = TRUE) + } else { + skip_if(!requireNamespace(pkg, quietly = TRUE)) + } + attachNamespace(pkg) +} + +library(dplyr) + +test_that("readr read csvs roundtrip", { + load_or_skip("readr") + + tbl <- example_data[, c("dbl", "lgl", "false", "chr")] + + tf <- tempfile() + on.exit(unlink(tf)) + write.csv(tbl, tf, row.names = FALSE) + + # we should still be able to turn this into a table + new_df <- read_csv(tf, show_col_types = FALSE) + expect_equal(new_df, as_tibble(arrow_table(new_df))) + + # we should still be able to turn this into a table + new_df <- read_csv(tf, show_col_types = FALSE, lazy = TRUE) + expect_equal(new_df, as_tibble(arrow_table(new_df))) + + # and can roundtrip to a parquet file + pq_tmp_file <- tempfile() + write_parquet(new_df, pq_tmp_file) + new_df_read <- read_parquet(pq_tmp_file) + + # we should still be able to turn this into a table + expect_equal(new_df, new_df_read) +}) + +test_that("data.table objects roundtrip", { + load_or_skip("data.table") + + # https://github.com/Rdatatable/data.table/blob/83fd2c05ce2d8555ceb8ba417833956b1b574f7e/R/cedta.R#L25-L27 + .datatable.aware=TRUE + + DT <- as.data.table(example_data) + + # Table -> collect which is what writing + reading to parquet uses under the hood to roundtrip + tab <- as_arrow_table(DT) + DT_read <- collect(tab) + + # we should still be able to turn this into a table + # the .internal.selfref attribute is automatically ignored by testthat/waldo + expect_equal(DT, DT_read) + + # and we can set keys + indices + create new columns + setkey(DT, chr) + setindex(DT, dbl) + DT[, dblshift := data.table::shift(dbl, 1)] + + # Table -> collect + tab <- as_arrow_table(DT) + DT_read <- collect(tab) + + # we should still be able to turn this into a table + expect_equal(DT, DT_read) +}) + +test_that("units roundtrip", { + load_or_skip("units") + + tbl <- example_data + units(tbl$dbl) <- "s" + + # Table -> collect which is what writing + reading to parquet uses under the hood to roundtrip + tab <- as_arrow_table(tbl) + tbl_read <- collect(tab) + + # we should still be able to turn this into a table + expect_equal(tbl, tbl_read) +})