diff --git a/r/.Rbuildignore b/r/.Rbuildignore index 6830c9019cd..91a8d741a8e 100644 --- a/r/.Rbuildignore +++ b/r/.Rbuildignore @@ -9,6 +9,7 @@ Dockerfile .*\.tar\.gz ^windows ^libarrow +^revdep clang_format.sh ^cran-comments\.md$ ^arrow_.*.tar.gz$ diff --git a/r/.gitignore b/r/.gitignore index 5fda6334d16..e5ab1197071 100644 --- a/r/.gitignore +++ b/r/.gitignore @@ -13,6 +13,7 @@ src/Makevars src/Makevars.win windows/ libarrow/ +revdep/ vignettes/nyc-taxi/ arrow_*.tar.gz arrow_*.tgz diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 833dc18f488..b54616e6a25 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -21,7 +21,7 @@ BugReports: https://issues.apache.org/jira/projects/ARROW/issues Encoding: UTF-8 Language: en-US LazyData: true -SystemRequirements: C++11 +SystemRequirements: C++11; for AWS S3 support on Linux, libcurl and openssl (optional) Biarch: true Imports: assertthat, diff --git a/r/NEWS.md b/r/NEWS.md index 86d1c7aac53..15c66ae364a 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -62,6 +62,7 @@ to send and receive data. See `vignette("flight", package = "arrow")` for an ove * File writers now respect the system umask setting * `ParquetFileReader` has additional methods for accessing individual columns or row groups from the file * Various segfaults fixed: invalid input in `ParquetFileWriter`; invalid `ArrowObject` pointer from a saved R object; converting deeply nested structs from Arrow to R +* The `properties` and `arrow_properties` arguments to `write_parquet()` are deprecated # arrow 1.0.1 diff --git a/r/R/parquet.R b/r/R/parquet.R index 1a805c8c8ad..1bc67427b48 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -93,6 +93,12 @@ read_parquet <- function(file, #' @param allow_truncated_timestamps Allow loss of data when coercing timestamps to a #' particular resolution. E.g. if microsecond or nanosecond data is lost when coercing #' to "ms", do not raise an exception +#' @param properties A `ParquetWriterProperties` object, used instead of the options +#' enumerated in this function's signature. Providing `properties` as an argument +#' is deprecated; if you need to assemble `ParquetWriterProperties` outside +#' of `write_parquet()`, use `ParquetFileWriter` instead. +#' @param arrow_properties A `ParquetArrowWriterProperties` object. Like +#' `properties`, this argument is deprecated. #' #' @details The parameters `compression`, `compression_level`, `use_dictionary` and #' `write_statistics` support various patterns: @@ -140,7 +146,9 @@ write_parquet <- function(x, # arrow writer properties use_deprecated_int96_timestamps = FALSE, coerce_timestamps = NULL, - allow_truncated_timestamps = FALSE) { + allow_truncated_timestamps = FALSE, + properties = NULL, + arrow_properties = NULL) { x_out <- x if (is.data.frame(x)) { x <- Table$create(x) @@ -151,10 +159,18 @@ write_parquet <- function(x, on.exit(sink$close()) } + # Deprecation warnings + if (!is.null(properties)) { + warning("Providing 'properties' is deprecated. If you need to assemble properties outside this function, use ParquetFileWriter instead.") + } + if (!is.null(arrow_properties)) { + warning("Providing 'arrow_properties' is deprecated. If you need to assemble arrow_properties outside this function, use ParquetFileWriter instead.") + } + writer <- ParquetFileWriter$create( x$schema, sink, - properties = ParquetWriterProperties$create( + properties = properties %||% ParquetWriterProperties$create( x, version = version, compression = compression, @@ -163,7 +179,7 @@ write_parquet <- function(x, write_statistics = write_statistics, data_page_size = data_page_size ), - arrow_properties = ParquetArrowWriterProperties$create( + arrow_properties = arrow_properties %||% ParquetArrowWriterProperties$create( use_deprecated_int96_timestamps = use_deprecated_int96_timestamps, coerce_timestamps = coerce_timestamps, allow_truncated_timestamps = allow_truncated_timestamps diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index f639db9cc1b..d0e4f24dc46 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -16,7 +16,9 @@ write_parquet( data_page_size = NULL, use_deprecated_int96_timestamps = FALSE, coerce_timestamps = NULL, - allow_truncated_timestamps = FALSE + allow_truncated_timestamps = FALSE, + properties = NULL, + arrow_properties = NULL ) } \arguments{ @@ -49,6 +51,14 @@ size of data pages within a column chunk (in bytes). Default 1 MiB.} \item{allow_truncated_timestamps}{Allow loss of data when coercing timestamps to a particular resolution. E.g. if microsecond or nanosecond data is lost when coercing to "ms", do not raise an exception} + +\item{properties}{A \code{ParquetWriterProperties} object, used instead of the options +enumerated in this function's signature. Providing \code{properties} as an argument +is deprecated; if you need to assemble \code{ParquetWriterProperties} outside +of \code{write_parquet()}, use \code{ParquetFileWriter} instead.} + +\item{arrow_properties}{A \code{ParquetArrowWriterProperties} object. Like +\code{properties}, this argument is deprecated.} } \value{ the input \code{x} invisibly. diff --git a/r/tools/linuxlibs.R b/r/tools/linuxlibs.R index e5b928f5fad..d36192ee7a3 100644 --- a/r/tools/linuxlibs.R +++ b/r/tools/linuxlibs.R @@ -55,6 +55,12 @@ download_binary <- function(os = identify_os()) { binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip") if (try_download(binary_url, libfile)) { cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os)) + if (!identical(os, "centos-7")) { + # centos-7 uses gcc 4.8 so the binary doesn't have ARROW_S3=ON but the others do + # TODO: actually check for system requirements? + cat("**** Binary package requires libcurl and openssl\n") + cat("**** If installation fails, retry after installing those system requirements\n") + } } else { cat(sprintf("*** No C++ binaries found for %s\n", os)) libfile <- NULL