diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index e7de0c517bc..7289f7a669e 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -186,23 +186,24 @@ jobs: strategy: fail-fast: false env: + ARROW_BUILD_TESTS: ON + ARROW_DATASET: ON + ARROW_FLIGHT: ON + ARROW_GANDIVA: ON + ARROW_HDFS: ON ARROW_HOME: /usr/local ARROW_JEMALLOC: ON # TODO(kszucs): link error in the tests - ARROW_DATASET: ON ARROW_ORC: OFF - ARROW_FLIGHT: ON - ARROW_HDFS: ON - ARROW_PLASMA: ON - ARROW_GANDIVA: ON ARROW_PARQUET: ON - ARROW_WITH_ZLIB: ON - ARROW_WITH_LZ4: ON + ARROW_PLASMA: ON + ARROW_S3: ON + ARROW_WITH_BROTLI: ON ARROW_WITH_BZ2: ON - ARROW_WITH_ZSTD: ON + ARROW_WITH_LZ4: ON ARROW_WITH_SNAPPY: ON - ARROW_WITH_BROTLI: ON - ARROW_BUILD_TESTS: ON + ARROW_WITH_ZLIB: ON + ARROW_WITH_ZSTD: ON steps: - name: Checkout Arrow uses: actions/checkout@v2 @@ -371,6 +372,14 @@ jobs: run: | export CMAKE_BUILD_PARALLEL_LEVEL=$NUMBER_OF_PROCESSORS ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" + - name: Download MinIO + shell: msys2 {0} + run: | + mkdir -p /usr/local/bin + wget \ + --output-document /usr/local/bin/minio.exe \ + https://dl.min.io/server/minio/release/windows-amd64/minio.exe + chmod +x /usr/local/bin/minio.exe - name: Test shell: msys2 {0} run: | diff --git a/.travis.yml b/.travis.yml index ef0e9b6d60d..5ba51da4e39 100644 --- a/.travis.yml +++ b/.travis.yml @@ -58,13 +58,16 @@ jobs: ARCH: s390x ARROW_CI_MODULES: "CPP" DOCKER_IMAGE_ID: ubuntu-cpp - # Can't use CMAKE_UNITIFY_BUILD=ON because of compiler crash + # Can't use CMAKE_UNITIFY_BUILD=ON because of compiler crash. + # Can't enable ARROW_S3 because compiler is killed while compiling + # aws-sdk-cpp. DOCKER_RUN_ARGS: >- " -e ARROW_BUILD_STATIC=OFF -e ARROW_FLIGHT=ON -e ARROW_ORC=OFF -e ARROW_PARQUET=OFF + -e ARROW_S3=OFF -e PARQUET_BUILD_EXAMPLES=OFF -e PARQUET_BUILD_EXECUTABLES=OFF -e Protobuf_SOURCE=BUNDLED diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile index f86c009b57b..74143dcbfa4 100644 --- a/ci/docker/debian-10-cpp.dockerfile +++ b/ci/docker/debian-10-cpp.dockerfile @@ -17,6 +17,7 @@ ARG arch=amd64 FROM ${arch}/debian:10 +ARG arch ENV DEBIAN_FRONTEND noninteractive @@ -26,7 +27,7 @@ RUN \ ARG llvm RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends \ + apt-get install -y -q --no-install-recommends \ apt-transport-https \ ca-certificates \ gnupg \ @@ -49,6 +50,7 @@ RUN apt-get update -y -q && \ libbrotli-dev \ libbz2-dev \ libc-ares-dev \ + libcurl4-openssl-dev \ libgflags-dev \ libgmock-dev \ libgoogle-glog-dev \ @@ -71,6 +73,10 @@ RUN apt-get update -y -q && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* +COPY ci/scripts/install_minio.sh \ + /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local + ENV ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ ARROW_DATASET=ON \ @@ -80,6 +86,7 @@ ENV ARROW_BUILD_TESTS=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_PLASMA=ON \ + ARROW_S3=ON \ ARROW_USE_CCACHE=ON \ ARROW_WITH_BROTLI=ON \ ARROW_WITH_BZ2=ON \ @@ -87,6 +94,7 @@ ENV ARROW_BUILD_TESTS=ON \ ARROW_WITH_SNAPPY=ON \ ARROW_WITH_ZLIB=ON \ ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ cares_SOURCE=BUNDLED \ CC=gcc \ CXX=g++ \ diff --git a/ci/docker/fedora-32-cpp.dockerfile b/ci/docker/fedora-32-cpp.dockerfile index 535f8b4b761..40fe4617b12 100644 --- a/ci/docker/fedora-32-cpp.dockerfile +++ b/ci/docker/fedora-32-cpp.dockerfile @@ -17,10 +17,11 @@ ARG arch FROM ${arch}/fedora:32 +ARG arch # install dependencies RUN dnf update -y && \ - dnf install -y \ + dnf install -y \ autoconf \ boost-devel \ brotli-devel \ @@ -29,6 +30,7 @@ RUN dnf update -y && \ ccache \ clang-devel \ cmake \ + curl-devel \ flatbuffers-devel \ java-1.8.0-openjdk-devel \ java-1.8.0-openjdk-headless \ @@ -54,9 +56,14 @@ RUN dnf update -y && \ snappy-devel \ thrift-devel \ utf8proc-devel \ + wget \ which \ zlib-devel +COPY ci/scripts/install_minio.sh \ + /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local + # * gRPC 1.26 in Fedora 32 may have a problem. arrow-flight-test is stuck. ENV ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ @@ -67,6 +74,7 @@ ENV ARROW_BUILD_TESTS=ON \ ARROW_HOME=/usr/local \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ + ARROW_S3=ON \ ARROW_USE_CCACHE=ON \ ARROW_WITH_BROTLI=ON \ ARROW_WITH_BZ2=ON \ @@ -74,6 +82,7 @@ ENV ARROW_BUILD_TESTS=ON \ ARROW_WITH_SNAPPY=ON \ ARROW_WITH_ZLIB=ON \ ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ CC=gcc \ CXX=g++ \ gRPC_SOURCE=BUNDLED \ diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index 85827358dfb..f47044e334b 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -17,6 +17,7 @@ ARG base FROM ${base} +ARG arch # Build R # [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04 @@ -70,6 +71,10 @@ COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ COPY r/DESCRIPTION /arrow/r/ RUN /arrow/ci/scripts/r_deps.sh /arrow +COPY ci/scripts/install_minio.sh \ + /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local + # Set up Python 3 and its dependencies RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ ln -s /usr/bin/pip3 /usr/local/bin/pip @@ -89,6 +94,7 @@ ENV \ ARROW_PARQUET=ON \ ARROW_PLASMA=OFF \ ARROW_PYTHON=ON \ + ARROW_S3=ON \ ARROW_USE_CCACHE=ON \ ARROW_USE_GLOG=OFF \ LC_ALL=en_US.UTF-8 diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile index 1d963a20d14..5223d7aafa5 100644 --- a/ci/docker/linux-r.dockerfile +++ b/ci/docker/linux-r.dockerfile @@ -24,12 +24,16 @@ FROM ${base} ARG r_bin=R ENV R_BIN=${r_bin} +ARG r_dev=FALSE +ENV ARROW_R_DEV=${r_dev} + # Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile) ENV PATH "${RPREFIX}/bin:${PATH}" # Patch up some of the docker images COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/ COPY ci/etc/rprofile /arrow/ci/etc/ +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/r_docker_configure.sh COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ diff --git a/ci/docker/ubuntu-18.04-cpp.dockerfile b/ci/docker/ubuntu-18.04-cpp.dockerfile index a0fe1b3f6be..bfff20b441c 100644 --- a/ci/docker/ubuntu-18.04-cpp.dockerfile +++ b/ci/docker/ubuntu-18.04-cpp.dockerfile @@ -70,6 +70,7 @@ RUN apt-get update -y -q && \ libboost-system-dev \ libbrotli-dev \ libbz2-dev \ + libcurl4-openssl-dev \ libgflags-dev \ libgoogle-glog-dev \ liblz4-dev \ @@ -96,6 +97,7 @@ RUN apt-get update -y -q && \ # - libgtest-dev only provide sources # - libprotobuf-dev only provide sources # - thrift is too old +# - s3 tests would require boost-asio that is included since Boost 1.66.0 ENV ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ ARROW_DATASET=ON \ @@ -117,6 +119,7 @@ ENV ARROW_BUILD_TESTS=ON \ ARROW_WITH_SNAPPY=ON \ ARROW_WITH_ZLIB=ON \ ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ GTest_SOURCE=BUNDLED \ ORC_SOURCE=BUNDLED \ PARQUET_BUILD_EXECUTABLES=ON \ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index ce738f5e554..5b455b946c5 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -17,6 +17,7 @@ ARG base=amd64/ubuntu:20.04 FROM ${base} +ARG arch SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -57,6 +58,7 @@ RUN apt-get update -y -q && \ libbrotli-dev \ libbz2-dev \ libgflags-dev \ + libcurl4-openssl-dev \ libgoogle-glog-dev \ liblz4-dev \ libprotobuf-dev \ @@ -72,10 +74,15 @@ RUN apt-get update -y -q && \ pkg-config \ protobuf-compiler \ rapidjson-dev \ - tzdata && \ + tzdata \ + wget && \ apt-get clean && \ rm -rf /var/lib/apt/lists* +COPY ci/scripts/install_minio.sh \ + /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local + # Prioritize system packages and local installation # The following dependencies will be downloaded due to missing/invalid packages # provided by the distribution: @@ -85,6 +92,7 @@ RUN apt-get update -y -q && \ # - libprotobuf-dev only provide sources ENV ARROW_BUILD_TESTS=ON \ ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_S3=ON \ ARROW_DATASET=ON \ ARROW_FLIGHT=OFF \ ARROW_GANDIVA=ON \ @@ -104,6 +112,7 @@ ENV ARROW_BUILD_TESTS=ON \ ARROW_WITH_SNAPPY=ON \ ARROW_WITH_ZLIB=ON \ ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ GTest_SOURCE=BUNDLED \ ORC_SOURCE=BUNDLED \ PARQUET_BUILD_EXAMPLES=ON \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 53fd6a32d17..fe109b77b09 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -101,6 +101,7 @@ cmake -G "${CMAKE_GENERATOR:-Ninja}" \ -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \ + -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \ -DBOOST_SOURCE=${BOOST_SOURCE:-} \ -DBrotli_SOURCE=${Brotli_SOURCE:-} \ diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh index eb556cceffb..d7e239b7c07 100755 --- a/ci/scripts/cpp_test.sh +++ b/ci/scripts/cpp_test.sh @@ -62,18 +62,6 @@ case "$(uname)" in exclude_tests="${exclude_tests}|gandiva-literal-test" exclude_tests="${exclude_tests}|gandiva-null-validity-test" fi - # TODO: Enable this when we can use aws-sdk-cpp as a shared - # library. The current aws-sdk-cpp MSYS2 package provides only - # static library. If we use aws-sdk-cpp as a static library, we - # can't use aws-sdk-cpp directly in - # cpp/src/arrow/filesystem/s3fs_test.c. Because aws-sdk-cpp uses - # static variables to keep process wide objects. If we aws-sdk-cpp - # as a static library, we have two aws-sdk-cpp libraries (in - # libarrow.dll and - # arrow-s3fs-test.exe). arrow::fs::EnsureS3Initialized() only - # initializes aws-sdk-cpp in libarrow.dll. It doesn't initialize - # aws-sdk-cpp in arrow-s3fs-test.exe. - exclude_tests="${exclude_tests}|arrow-s3fs-test" ctest_options+=(--exclude-regex "${exclude_tests}") ;; *) diff --git a/ci/scripts/install_minio.sh b/ci/scripts/install_minio.sh index 9ed70afc03b..07450f48279 100755 --- a/ci/scripts/install_minio.sh +++ b/ci/scripts/install_minio.sh @@ -20,11 +20,13 @@ set -e declare -A archs -archs=([amd64]=amd64) +archs=([amd64]=amd64 + [arm64v8]=arm64 + [s390x]=s390x) declare -A platforms -platforms=([macos]=darwin - [linux]=linux) +platforms=([linux]=linux + [macos]=darwin) arch=${archs[$1]} platform=${platforms[$2]} @@ -34,10 +36,10 @@ prefix=$4 if [ "$#" -ne 4 ]; then echo "Usage: $0 " exit 1 -elif [[ -z ${archs[$1]} ]]; then +elif [[ -z ${arch} ]]; then echo "Unexpected architecture: ${1}" exit 1 -elif [[ -z ${platforms[$2]} ]]; then +elif [[ -z ${platform} ]]; then echo "Unexpected platform: ${2}" exit 1 elif [[ ${version} != "latest" ]]; then @@ -45,5 +47,5 @@ elif [[ ${version} != "latest" ]]; then exit 1 fi -wget -nv -P ${prefix}/bin https://dl.min.io/server/minio/release/linux-${arch}/minio +wget -nv -P ${prefix}/bin https://dl.min.io/server/minio/release/${platform}-${arch}/minio chmod +x ${prefix}/bin/minio diff --git a/ci/scripts/r_deps.sh b/ci/scripts/r_deps.sh index a2dc58fd97b..7e9d2eac7a9 100755 --- a/ci/scripts/r_deps.sh +++ b/ci/scripts/r_deps.sh @@ -25,7 +25,7 @@ source_dir=${1}/r pushd ${source_dir} # Install R package dependencies -${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck'))" +${R_BIN} -e "install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys'))" ${R_BIN} -e "remotes::install_deps(dependencies = TRUE)" popd diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh index 1d7e8de8bf5..e6594e03a88 100755 --- a/ci/scripts/r_docker_configure.sh +++ b/ci/scripts/r_docker_configure.sh @@ -39,6 +39,25 @@ if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak fi +# Install openssl for S3 support +if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then + if [ "`which dnf`" ]; then + dnf install -y libcurl-devel openssl-devel + elif [ "`which yum`" ]; then + yum install -y libcurl-devel openssl-devel + elif [ "`which zypper`" ]; then + zypper install -y libcurl-devel libopenssl-devel + else + apt-get update + apt-get install -y libcurl4-openssl-dev libssl-dev + fi + + # The Dockerfile should have put this file here + if [ -f "/arrow/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then + /arrow/ci/scripts/install_minio.sh amd64 linux latest /usr/local + fi +fi + # Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786 Rscript -e 'x <- file.path(R.home("doc"), "html"); if (!file.exists(x)) {dir.create(x, recursive=TRUE); file.copy(system.file("html/R.css", package="stats"), x)}' diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh index 05c70d8a560..a2428e912be 100755 --- a/ci/scripts/r_test.sh +++ b/ci/scripts/r_test.sh @@ -59,6 +59,13 @@ ${R_BIN} -e "as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') if (as_cran) { rcmdcheck::rcmdcheck(args = c('--as-cran', '--run-donttest'), error_on = 'warning', check_dir = 'check') } else { + if (nzchar(Sys.which('minio'))) { + message('Running minio for S3 tests (if build supports them)') + minio_dir <- tempfile() + dir.create(minio_dir) + pid <- sys::exec_background('minio', c('server', minio_dir)) + on.exit(tools::pskill(pid)) + } rcmdcheck::rcmdcheck(build_args = '--no-build-vignettes', args = c('--no-manual', '--ignore-vignettes', '--run-donttest'), error_on = 'warning', check_dir = 'check') }" diff --git a/cpp/Brewfile b/cpp/Brewfile index 3eec83f5372..7de6c7deabe 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -28,6 +28,7 @@ brew "grpc" brew "llvm" brew "llvm@8" brew "lz4" +brew "minio" brew "ninja" brew "numpy" brew "openssl@1.1" diff --git a/cpp/build-support/run-test.sh b/cpp/build-support/run-test.sh index 7e2a22f069a..32a7ab90f8e 100755 --- a/cpp/build-support/run-test.sh +++ b/cpp/build-support/run-test.sh @@ -96,12 +96,14 @@ function run_test() { # even when retries are successful. rm -f $XMLFILE - $TEST_EXECUTABLE "$@" 2>&1 \ + $TEST_EXECUTABLE "$@" 2>&1 $LOGFILE.raw + STATUS=$? + cat $LOGFILE.raw \ | ${PYTHON:-python} $ROOT/build-support/asan_symbolize.py \ | ${CXXFILT:-c++filt} \ | $ROOT/build-support/stacktrace_addr2line.pl $TEST_EXECUTABLE \ | $pipe_cmd 2>&1 | tee $LOGFILE - STATUS=$? + rm -f $LOGFILE.raw # TSAN doesn't always exit with a non-zero exit code due to a bug: # mutex errors don't get reported through the normal error reporting infrastructure. diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 64ec7fa28bd..cd64ff0b366 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -316,6 +316,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_LZ4_USE_SHARED "Rely on lz4 shared libraries where relevant" ${ARROW_DEPENDENCY_USE_SHARED}) + define_option(ARROW_OPENSSL_USE_SHARED "Rely on OpenSSL shared libraries where relevant" + ${ARROW_DEPENDENCY_USE_SHARED}) + define_option(ARROW_PROTOBUF_USE_SHARED "Rely on Protocol Buffers shared libraries where relevant" ${ARROW_DEPENDENCY_USE_SHARED}) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 151ae6f7b2c..8422eebc480 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -300,6 +300,33 @@ else() "https://github.com/abseil/abseil-cpp/archive/${ARROW_ABSL_BUILD_VERSION}.tar.gz") endif() +if(DEFINED ENV{ARROW_AWS_C_COMMON_URL}) + set(AWS_C_COMMON_SOURCE_URL "$ENV{ARROW_AWS_C_COMMON_URL}") +else() + set_urls( + AWS_C_COMMON_SOURCE_URL + "https://github.com/awslabs/aws-c-common/archive/${ARROW_AWS_C_COMMON_BUILD_VERSION}.tar.gz" + ) +endif() + +if(DEFINED ENV{ARROW_AWS_CHECKSUMS_URL}) + set(AWS_CHECKSUMS_SOURCE_URL "$ENV{ARROW_AWS_CHECKSUMS_URL}") +else() + set_urls( + AWS_CHECKSUMS_SOURCE_URL + "https://github.com/awslabs/aws-checksums/archive/${ARROW_AWS_CHECKSUMS_BUILD_VERSION}.tar.gz" + ) +endif() + +if(DEFINED ENV{ARROW_AWS_C_EVENT_STREAM_URL}) + set(AWS_C_EVENT_STREAM_SOURCE_URL "$ENV{ARROW_AWS_C_EVENT_STREAM_URL}") +else() + set_urls( + AWS_C_EVENT_STREAM_SOURCE_URL + "https://github.com/awslabs/aws-c-event-stream/archive/${ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION}.tar.gz" + ) +endif() + if(DEFINED ENV{ARROW_AWSSDK_URL}) set(AWSSDK_SOURCE_URL "$ENV{ARROW_AWSSDK_URL}") else() @@ -971,8 +998,24 @@ endif() set(ARROW_USE_OPENSSL OFF) if(PARQUET_REQUIRE_ENCRYPTION OR ARROW_FLIGHT OR ARROW_S3) - # This must work - find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED) + # OpenSSL is required + if(ARROW_OPENSSL_USE_SHARED) + # Find shared OpenSSL libraries. + set(OpenSSL_USE_STATIC_LIBS OFF) + # Seems that different envs capitalize this differently? + set(OPENSSL_USE_STATIC_LIBS OFF) + set(BUILD_SHARED_LIBS_KEEP ${BUILD_SHARED_LIBS}) + set(BUILD_SHARED_LIBS ON) + + find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED) + set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_KEEP}) + unset(BUILD_SHARED_LIBS_KEEP) + else() + # Find static OpenSSL headers and libs + set(OpenSSL_USE_STATIC_LIBS ON) + set(OPENSSL_USE_STATIC_LIBS ON) + find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED) + endif() set(ARROW_USE_OPENSSL ON) endif() @@ -2619,12 +2662,14 @@ endif() # AWS SDK for C++ macro(build_awssdk) - message( - FATAL_ERROR "FIXME: Building AWS C++ SDK from source will link with wrong libcrypto") message("Building AWS C++ SDK from source") - + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" + AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") + message(FATAL_ERROR "AWS C++ SDK requires gcc >= 4.9") + endif() set(AWSSDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/awssdk_ep-install") set(AWSSDK_INCLUDE_DIR "${AWSSDK_PREFIX}/include") + set(AWSSDK_LIB_DIR "lib") if(WIN32) # On Windows, need to match build types @@ -2633,50 +2678,110 @@ macro(build_awssdk) # Otherwise, always build in release mode. # Especially with gcc, debug builds can fail with "asm constraint" errors: # https://github.com/TileDB-Inc/TileDB/issues/1351 - set(AWSSDK_BUILD_TYPE Release) + set(AWSSDK_BUILD_TYPE release) endif() - set(AWSSDK_CMAKE_ARGS - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_INSTALL_LIBDIR=lib - -DBUILD_ONLY=s3;core;config;identity-management;sts - -DENABLE_UNITY_BUILD=on - -DENABLE_TESTING=off - "-DCMAKE_C_FLAGS=${EP_C_FLAGS}" - "-DCMAKE_INSTALL_PREFIX=${AWSSDK_PREFIX}") + set(AWSSDK_COMMON_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_BUILD_TYPE=${AWSSDK_BUILD_TYPE} + -DCMAKE_INSTALL_LIBDIR=${AWSSDK_LIB_DIR} + -DENABLE_TESTING=OFF + -DENABLE_UNITY_BUILD=ON + "-DCMAKE_INSTALL_PREFIX=${AWSSDK_PREFIX}" + "-DCMAKE_PREFIX_PATH=${AWSSDK_PREFIX}") set( - AWSSDK_CORE_SHARED_LIB - "${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-core${CMAKE_SHARED_LIBRARY_SUFFIX}" - ) - set( - AWSSDK_S3_SHARED_LIB - "${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-s3${CMAKE_SHARED_LIBRARY_SUFFIX}" - ) - set( - AWSSDK_IAM_SHARED_LIB - "${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-identity-management${CMAKE_SHARED_LIBRARY_SUFFIX}" - ) - set( - AWSSDK_STS_SHARED_LIB - "${AWSSDK_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}aws-cpp-sdk-sts${CMAKE_SHARED_LIBRARY_SUFFIX}" - ) - set(AWSSDK_SHARED_LIBS "${AWSSDK_CORE_SHARED_LIB}" "${AWSSDK_S3_SHARED_LIB}" - "${AWSSDK_IAM_SHARED_LIB}" "${AWSSDK_STS_SHARED_LIB}") + AWSSDK_CMAKE_ARGS + ${AWSSDK_COMMON_CMAKE_ARGS} -DBUILD_DEPS=OFF + -DBUILD_ONLY=config\\$s3\\$transfer\\$identity-management\\$sts + -DMINIMIZE_SIZE=ON) + + file(MAKE_DIRECTORY ${AWSSDK_INCLUDE_DIR}) + + # AWS C++ SDK related libraries to link statically + set(_AWSSDK_LIBS + aws-cpp-sdk-identity-management + aws-cpp-sdk-sts + aws-cpp-sdk-cognito-identity + aws-cpp-sdk-s3 + aws-cpp-sdk-core + aws-c-event-stream + aws-checksums + aws-c-common) + set(AWSSDK_LIBRARIES) + foreach(_AWSSDK_LIB ${_AWSSDK_LIBS}) + # aws-c-common -> AWS-C-COMMON + string(TOUPPER ${_AWSSDK_LIB} _AWSSDK_LIB_UPPER) + # AWS-C-COMMON -> AWS_C_COMMON + string(REPLACE "-" "_" _AWSSDK_LIB_NAME_PREFIX ${_AWSSDK_LIB_UPPER}) + set( + _AWSSDK_STATIC_LIBRARY + "${AWSSDK_PREFIX}/${AWSSDK_LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${_AWSSDK_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + if(${_AWSSDK_LIB} MATCHES "^aws-cpp-sdk-") + set(_AWSSDK_TARGET_NAME ${_AWSSDK_LIB}) + else() + set(_AWSSDK_TARGET_NAME AWS::${_AWSSDK_LIB}) + endif() + add_library(${_AWSSDK_TARGET_NAME} STATIC IMPORTED) + set_target_properties( + ${_AWSSDK_TARGET_NAME} + PROPERTIES IMPORTED_LOCATION ${_AWSSDK_STATIC_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES + "${AWSSDK_INCLUDE_DIR}") + set("${_AWSSDK_LIB_NAME_PREFIX}_STATIC_LIBRARY" ${_AWSSDK_STATIC_LIBRARY}) + list(APPEND AWSSDK_LIBRARIES ${_AWSSDK_TARGET_NAME}) + endforeach() + + externalproject_add(aws_c_common_ep + ${EP_LOG_OPTIONS} + URL ${AWS_C_COMMON_SOURCE_URL} + CMAKE_ARGS ${AWSSDK_COMMON_CMAKE_ARGS} + BUILD_BYPRODUCTS ${AWS_C_COMMON_STATIC_LIBRARY}) + add_dependencies(AWS::aws-c-common aws_c_common_ep) + + externalproject_add(aws_checksums_ep + ${EP_LOG_OPTIONS} + URL ${AWS_CHECKSUMS_SOURCE_URL} + CMAKE_ARGS ${AWSSDK_COMMON_CMAKE_ARGS} + BUILD_BYPRODUCTS ${AWS_CHECKSUMS_STATIC_LIBRARY}) + add_dependencies(AWS::aws-checksums aws_checksums_ep) + + externalproject_add(aws_c_event_stream_ep + ${EP_LOG_OPTIONS} + URL ${AWS_C_EVENT_STREAM_SOURCE_URL} + CMAKE_ARGS ${AWSSDK_COMMON_CMAKE_ARGS} + BUILD_BYPRODUCTS ${AWS_C_EVENT_STREAM_STATIC_LIBRARY} + DEPENDS aws_c_common_ep aws_checksums_ep) + add_dependencies(AWS::aws-c-event-stream aws_c_event_stream_ep) externalproject_add(awssdk_ep ${EP_LOG_OPTIONS} URL ${AWSSDK_SOURCE_URL} CMAKE_ARGS ${AWSSDK_CMAKE_ARGS} - BUILD_BYPRODUCTS ${AWSSDK_SHARED_LIBS}) - - file(MAKE_DIRECTORY ${AWSSDK_INCLUDE_DIR}) - + BUILD_BYPRODUCTS ${AWS_CPP_SDK_COGNITO_IDENTITY_STATIC_LIBRARY} + ${AWS_CPP_SDK_CORE_STATIC_LIBRARY} + ${AWS_CPP_SDK_IDENTITY_MANAGEMENT_STATIC_LIBRARY} + ${AWS_CPP_SDK_S3_STATIC_LIBRARY} + ${AWS_CPP_SDK_STS_STATIC_LIBRARY} + DEPENDS aws_c_event_stream_ep) add_dependencies(toolchain awssdk_ep) - set(AWSSDK_LINK_LIBRARIES ${AWSSDK_SHARED_LIBS}) + foreach(_AWSSDK_LIB ${_AWSSDK_LIBS}) + if(${_AWSSDK_LIB} MATCHES "^aws-cpp-sdk-") + add_dependencies(${_AWSSDK_LIB} awssdk_ep) + endif() + endforeach() + set(AWSSDK_VENDORED TRUE) + list(APPEND ARROW_BUNDLED_STATIC_LIBS ${AWSSDK_LIBRARIES}) + set(AWSSDK_LINK_LIBRARIES ${AWSSDK_LIBRARIES}) + if(UNIX) + # on linux and macos curl seems to be required + find_package(CURL REQUIRED) + list(APPEND AWSSDK_LINK_LIBRARIES ${CURL_LIBRARIES}) + endif() - # AWSSDK is shared-only build + # AWSSDK is static-only build endmacro() if(ARROW_S3) diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt index 41b8e69d77b..5b06646157b 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt @@ -32,8 +32,24 @@ add_arrow_test(filesystem-test if(ARROW_S3) add_arrow_test(s3fs_test EXTRA_LABELS filesystem) if(TARGET arrow-s3fs-test) + set(ARROW_S3FS_TEST_COMPILE_DEFINITIONS ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS}) + get_target_property(AWS_CPP_SDK_S3_TYPE aws-cpp-sdk-s3 TYPE) + # We need to initialize AWS C++ SDK for direct use (not via + # arrow::fs::S3FileSystem) in arrow-s3fs-test if we use static AWS + # C++ SDK. Because AWS C++ SDK has internal static variables that + # aren't shared in libarrow and arrow-s3fs-test. It means that + # arrow::fs::InitializeS3() doesn't initialize AWS C++ SDK that is + # directly used in arrow-s3fs-test. + # + # But it seems that internal static variables in AWS C++ SDK are + # shared on macOS even if we link static AWS C++ SDK to both + # libarrow and arrow-s3fs-test. So we don't need to initialize AWS + # C++ SDK in arrow-s3fs-test on macOS. + if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY" AND NOT APPLE) + list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_NOT_SHARED") + endif() target_compile_definitions(arrow-s3fs-test PRIVATE - ${ARROW_BOOST_PROCESS_COMPILE_DEFINITIONS}) + ${ARROW_S3FS_TEST_COMPILE_DEFINITIONS}) endif() if(ARROW_BUILD_TESTS) diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 4f38cf24206..99e6b3f7dfd 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -122,31 +122,58 @@ class AwsTestMixin : public ::testing::Test { // EC2 metadata endpoint AwsTestMixin() : ec2_metadata_disabled_guard_("AWS_EC2_METADATA_DISABLED", "true") {} + void SetUp() override { +#ifdef AWS_CPP_SDK_S3_NOT_SHARED + auto aws_log_level = Aws::Utils::Logging::LogLevel::Fatal; + aws_options_.loggingOptions.logLevel = aws_log_level; + aws_options_.loggingOptions.logger_create_fn = [&aws_log_level] { + return std::make_shared(aws_log_level); + }; + Aws::InitAPI(aws_options_); +#endif + } + + void TearDown() override { +#ifdef AWS_CPP_SDK_S3_NOT_SHARED + Aws::ShutdownAPI(aws_options_); +#endif + } + private: EnvVarGuard ec2_metadata_disabled_guard_; +#ifdef AWS_CPP_SDK_S3_NOT_SHARED + Aws::SDKOptions aws_options_; +#endif }; class S3TestMixin : public AwsTestMixin { public: void SetUp() override { + AwsTestMixin::SetUp(); + ASSERT_OK(minio_.Start()); - client_config_.endpointOverride = ToAwsString(minio_.connect_string()); - client_config_.scheme = Aws::Http::Scheme::HTTP; - client_config_.retryStrategy = std::make_shared(); + client_config_.reset(new Aws::Client::ClientConfiguration()); + client_config_->endpointOverride = ToAwsString(minio_.connect_string()); + client_config_->scheme = Aws::Http::Scheme::HTTP; + client_config_->retryStrategy = std::make_shared(); credentials_ = {ToAwsString(minio_.access_key()), ToAwsString(minio_.secret_key())}; bool use_virtual_addressing = false; client_.reset( - new Aws::S3::S3Client(credentials_, client_config_, + new Aws::S3::S3Client(credentials_, *client_config_, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, use_virtual_addressing)); } - void TearDown() override { ASSERT_OK(minio_.Stop()); } + void TearDown() override { + ASSERT_OK(minio_.Stop()); + + AwsTestMixin::TearDown(); + } protected: MinioTestServer minio_; - Aws::Client::ClientConfiguration client_config_; + std::unique_ptr client_config_; Aws::Auth::AWSCredentials credentials_; std::unique_ptr client_; }; diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 2cece5468f8..545943d30c6 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -24,7 +24,10 @@ # format). ARROW_ABSL_BUILD_VERSION=2eba343b51e0923cd3fb919a6abd6120590fc059 -ARROW_AWSSDK_BUILD_VERSION=1.7.160 +ARROW_AWSSDK_BUILD_VERSION=1.8.57 +ARROW_AWS_CHECKSUMS_BUILD_VERSION=v0.1.5 +ARROW_AWS_C_COMMON_BUILD_VERSION=v0.4.59 +ARROW_AWS_C_EVENT_STREAM_BUILD_VERSION=v0.1.5 ARROW_BOOST_BUILD_VERSION=1.71.0 ARROW_BROTLI_BUILD_VERSION=v1.0.7 ARROW_BZIP2_BUILD_VERSION=1.0.8 diff --git a/dev/tasks/homebrew-formulae/travis.osx.r.yml b/dev/tasks/homebrew-formulae/travis.osx.r.yml index d8bdb370478..4340708512e 100644 --- a/dev/tasks/homebrew-formulae/travis.osx.r.yml +++ b/dev/tasks/homebrew-formulae/travis.osx.r.yml @@ -46,7 +46,7 @@ before_install: # Sometimes crossbow gives a remote URL with .git and sometimes not. Make sure there's only one - sed -i.bak -E -e 's@.git.git@.git@' tools/apache-arrow.rb && rm -f tools/apache-arrow.rb.bak # Get minio for S3 testing -- brew install minio/stable/minio +- brew install minio script: - Rscript -e 'install.packages(c("rcmdcheck", "sys"))' # Note that this is not --as-cran. CRAN doesn't do macOS checks --as-cran diff --git a/dev/tasks/python-wheels/osx-build.sh b/dev/tasks/python-wheels/osx-build.sh index 8b768ae71c2..a0bf7b81f5e 100755 --- a/dev/tasks/python-wheels/osx-build.sh +++ b/dev/tasks/python-wheels/osx-build.sh @@ -68,6 +68,7 @@ function build_wheel { -DARROW_GRPC_USE_SHARED=OFF \ -DARROW_HDFS=ON \ -DARROW_JEMALLOC=ON \ + -DARROW_OPENSSL_USE_SHARED=OFF \ -DARROW_ORC=OFF \ -DARROW_PARQUET=ON \ -DARROW_PLASMA=ON \ @@ -85,7 +86,6 @@ function build_wheel { -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ -DMAKE=make \ - -DOPENSSL_USE_STATIC_LIBS=ON \ -DProtobuf_SOURCE=SYSTEM \ .. make -j$(sysctl -n hw.logicalcpu) diff --git a/docker-compose.yml b/docker-compose.yml index b9a92a9f73b..90fdb970c2d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -248,6 +248,7 @@ services: cache_from: - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp args: + arch: ${ARCH} base: "${ARCH}/ubuntu:${UBUNTU}" clang_tools: ${CLANG_TOOLS} llvm: ${LLVM} @@ -277,6 +278,7 @@ services: cache_from: - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cuda-${CUDA}-cpp args: + arch: ${ARCH} base: nvidia/cuda:${CUDA}-devel-ubuntu${UBUNTU} clang_tools: ${CLANG_TOOLS} llvm: ${LLVM} @@ -858,6 +860,7 @@ services: cache_from: - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-r-${R} args: + arch: ${ARCH} r: ${R} base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp shm_size: *shm-size @@ -888,18 +891,18 @@ services: - ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG} args: base: ${R_ORG}/${R_IMAGE}:${R_TAG} + r_dev: ${ARROW_R_DEV} shm_size: *shm-size environment: LIBARROW_DOWNLOAD: "false" ARROW_HOME: "/arrow" + ARROW_R_DEV: ${ARROW_R_DEV} # To test for CRAN release, delete ^^ these two env vars so we download the Apache release ARROW_USE_PKG_CONFIG: "false" volumes: - .:/arrow:delegated command: > - /bin/bash -c " - export ARROW_R_DEV=${ARROW_R_DEV} && - /arrow/ci/scripts/r_test.sh /arrow" + /bin/bash -c "/arrow/ci/scripts/r_test.sh /arrow" ubuntu-r-sanitizer: # Only 18.04 and amd64 supported diff --git a/r/NEWS.md b/r/NEWS.md index c03ff2f7487..9e655efbb6e 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -27,8 +27,8 @@ ## AWS S3 support -* S3 support is now enabled in binary macOS and Windows (Rtools40 only, i.e. R >= 4.0) packages. To enable it on Linux, you will need to build and install `aws-sdk-cpp` from source, then set the environment variable `EXTRA_CMAKE_FLAGS="-DARROW_S3=ON -DAWSSDK_SOURCE=SYSTEM"` prior to building the R package (with bundled C++ build, not with Arrow system libraries) from source. -* File readers and writers (`read_parquet()`, `write_feather()`, et al.), as well as `open_dataset()` and `write_dataset()`, allow you to access resources on S3 (or on file systems that emulate S3) either by providing an `s3://` URI or by passing an additional `filesystem` argument. See `vignette("fs", package = "arrow")` for details. +* S3 support is now enabled in binary macOS and Windows (Rtools40 only, i.e. R >= 4.0) packages. To enable it on Linux, you need additional system dependencies `libcurl` and `openssl`, as well as a sufficiently modern compiler. See `vignette("install", package = "arrow")` for details. +* File readers and writers (`read_parquet()`, `write_feather()`, et al.), as well as `open_dataset()` and `write_dataset()`, allow you to access resources on S3 (or on file systems that emulate S3) either by providing an `s3://` URI or by passing an additional `filesystem` argument. See `vignette("fs", package = "arrow")` for examples. ## Computation diff --git a/r/R/install-arrow.R b/r/R/install-arrow.R index e3fa2ba2a2a..0ab17c99047 100644 --- a/r/R/install-arrow.R +++ b/r/R/install-arrow.R @@ -90,12 +90,12 @@ arrow_repos <- function(repos = getOption("repos"), nightly = FALSE) { # Set the default/CDN repos <- "https://cloud.r-project.org/" } - bintray <- getOption("arrow.dev.repo", "https://dl.bintray.com/ursalabs/arrow-r") + dev_repo <- getOption("arrow.dev_repo", "https://arrow-r-nightly.s3.amazonaws.com") # Remove it if it's there (so nightly=FALSE won't accidentally pull from it) - repos <- setdiff(repos, bintray) + repos <- setdiff(repos, dev_repo) if (nightly) { # Add it first - repos <- c(bintray, repos) + repos <- c(dev_repo, repos) } repos } diff --git a/r/README.md b/r/README.md index 97c4378c7c4..63d62504d89 100644 --- a/r/README.md +++ b/r/README.md @@ -60,10 +60,10 @@ install_arrow() ## Installing a development version Development versions of the package (binary and source) are built daily and hosted at -. To install from there: +. To install from there: ``` r -install.packages("arrow", repos = "https://dl.bintray.com/ursalabs/arrow-r") +install.packages("arrow", repos = "https://arrow-r-nightly.s3.amazonaws.com") ``` Or @@ -91,7 +91,7 @@ brew install apache-arrow --HEAD ``` On Windows, you can download a .zip file with the arrow dependencies from the -[nightly bintray repository](https://dl.bintray.com/ursalabs/arrow-r/libarrow/bin/windows/), +[nightly repository](https://dl.bintray.com/ursalabs/arrow-r/libarrow/bin/windows/), and then set the `RWINLIB_LOCAL` environment variable to point to that zip file before installing the `arrow` R package. Version numbers in that repository correspond to dates, and you will likely want the most recent. diff --git a/r/configure b/r/configure index 4ad48c4d6c4..21bad6b1aa2 100755 --- a/r/configure +++ b/r/configure @@ -43,7 +43,7 @@ VERSION=`grep ^Version DESCRIPTION | sed s/Version:\ //` UNAME=`uname -s` # generate code -if [ "$ARROW_R_DEV" = "true" ]; then +if [ "$ARROW_R_DEV" = "true" ] && [ -f "data-raw/codegen.R" ]; then echo "*** Generating code with data-raw/codegen.R" ${R_HOME}/bin/Rscript data-raw/codegen.R fi @@ -179,6 +179,10 @@ if [ $? -eq 0 ] || [ "$UNAME" = "Darwin" ]; then grep 'set(ARROW_S3 "ON")' $LIB_DIR/cmake/arrow/ArrowOptions.cmake >/dev/null 2>&1 if [ $? -eq 0 ]; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_R_WITH_S3" + if [ "$BUNDLED_LIBS" != "" ]; then + # We're depending on openssl/curl from the system, so they're not in the bundled deps + PKG_LIBS="$PKG_LIBS -lssl -lcrypto -lcurl" + fi fi echo "PKG_CFLAGS=$PKG_CFLAGS" echo "PKG_LIBS=$PKG_LIBS" diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index 86c1ee9ab5d..a57aa70a068 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -63,6 +63,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ -DARROW_JSON=ON \ -DARROW_PARQUET=ON \ + -DARROW_S3=${ARROW_S3:-$ARROW_DEFAULT_PARAM} \ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-$ARROW_DEFAULT_PARAM} \ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-$ARROW_DEFAULT_PARAM} \ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-$ARROW_DEFAULT_PARAM} \ @@ -76,7 +77,6 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON \ -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON \ -DCMAKE_UNITY_BUILD=ON \ - -DOPENSSL_USE_STATIC_LIBS=ON \ ${EXTRA_CMAKE_FLAGS} \ -G ${CMAKE_GENERATOR:-"Unix Makefiles"} \ ${SOURCE_DIR} diff --git a/r/tests/testthat/test-install-arrow.R b/r/tests/testthat/test-install-arrow.R index cebaeeff4f7..8021e2ba10e 100644 --- a/r/tests/testthat/test-install-arrow.R +++ b/r/tests/testthat/test-install-arrow.R @@ -19,11 +19,16 @@ context("install_arrow()") r_only({ test_that("arrow_repos", { - old <- options(repos=c(CRAN = "@CRAN@")) # Restore default - on.exit(options(old)) cran <- "https://cloud.r-project.org/" - bt <- "https://dl.bintray.com/ursalabs/arrow-r" + bt <- "https://dl.bintray.com/ursalabs/fake_repo" other <- "https://cran.fiocruz.br/" + + old <- options( + repos=c(CRAN = "@CRAN@"), # Restore defaul + arrow.dev_repo = bt + ) + on.exit(options(old)) + expect_identical(arrow_repos(), cran) expect_identical(arrow_repos(c(cran, bt)), cran) expect_identical(arrow_repos(c(bt, other)), other) diff --git a/r/tools/linuxlibs.R b/r/tools/linuxlibs.R index dc0116f8232..e5b928f5fad 100644 --- a/r/tools/linuxlibs.R +++ b/r/tools/linuxlibs.R @@ -256,7 +256,7 @@ find_local_source <- function(arrow_home = Sys.getenv("ARROW_HOME", "..")) { build_libarrow <- function(src_dir, dst_dir) { # We'll need to compile R bindings with these libs, so delete any .o files - system("rm src/*.o", ignore.stdout = quietly, ignore.stderr = quietly) + system("rm src/*.o", ignore.stdout = TRUE, ignore.stderr = TRUE) # Set up make for parallel building makeflags <- Sys.getenv("MAKEFLAGS") if (makeflags == "") { @@ -299,6 +299,7 @@ build_libarrow <- function(src_dir, dst_dir) { LDFLAGS = R_CMD_config("LDFLAGS") ) env_vars <- paste0(names(env_var_list), '="', env_var_list, '"', collapse = " ") + env_vars <- with_s3_support(env_vars) cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n") status <- system( paste(env_vars, "inst/build_arrow_static.sh"), @@ -366,6 +367,46 @@ cmake_version <- function(cmd = "cmake") { ) } +with_s3_support <- function(env_vars) { + arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false" + if (arrow_s3) { + # User wants S3 support. Let's make sure they're not on gcc < 4.9 + # and make sure that we have curl and openssl system libs + info <- system(paste(env_vars, "&& $CMAKE --system-information"), intern = TRUE) + info <- grep("^[A-Z_]* .*$", info, value = TRUE) + vals <- as.list(sub('^.*? "?(.*?)"?$', "\\1", info)) + names(vals) <- sub("^(.*?) .*$", "\\1", info) + if (vals[["CMAKE_CXX_COMPILER_ID"]] == "GNU" && + package_version(vals[["CMAKE_CXX_COMPILER_VERSION"]]) < 4.9) { + cat("**** S3 support not available for gcc < 4.9; building with ARROW_S3=OFF\n") + arrow_s3 <- FALSE + } else if (!cmake_find_package("CURL", NULL, env_vars)) { + cat("**** S3 support requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb); building with ARROW_S3=OFF\n") + arrow_s3 <- FALSE + } else if (!cmake_find_package("OpenSSL", "1.0.2", env_vars)) { + cat("**** S3 support requires openssl-devel (rpm) or libssl-dev (deb), version >= 1.0.2; building with ARROW_S3=OFF\n") + arrow_s3 <- FALSE + } + } + paste(env_vars, ifelse(arrow_s3, "ARROW_S3=ON", "ARROW_S3=OFF")) +} + +cmake_find_package <- function(pkg, version = NULL, env_vars) { + td <- tempfile() + dir.create(td) + options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td)) + find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)") + writeLines(find_package, file.path(td, "CMakeLists.txt")) + cmake_cmd <- paste0( + env_vars, " && $CMAKE", + " -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON", + " -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON", + " -S", td, + " -B", td + ) + system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0 +} + ##### if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { diff --git a/r/vignettes/fs.Rmd b/r/vignettes/fs.Rmd index 5c2ece1cca4..d211a35a6b4 100644 --- a/r/vignettes/fs.Rmd +++ b/r/vignettes/fs.Rmd @@ -10,11 +10,10 @@ vignette: > The Arrow C++ library includes a generic filesystem interface and specific implementations for some cloud storage systems. This setup allows various parts of the project to be able to read and write data with different storage -backends. In the `arrow` R package, support has been enabled for AWS S3 on -macOS and Windows. This vignette provides an overview of working with S3 data -using Arrow. +backends. In the `arrow` R package, support has been enabled for AWS S3. +This vignette provides an overview of working with S3 data using Arrow. -> Note that S3 support is not enabled by default on Linux due to packaging complications. To enable it, you will need to build and install [aws-sdk-cpp](https://aws.amazon.com/sdk-for-cpp/) from source, then set the environment variable `EXTRA_CMAKE_FLAGS="-DARROW_S3=ON -DAWSSDK_SOURCE=SYSTEM"` prior to building the R package (with bundled C++ build, not with Arrow system libraries) from source. +> In Windows and macOS binary packages, S3 support is included. On Linux when installing from source, S3 support is not enabled by default, and it has additional system requirements. See `vignette("install", package = "arrow")` for details. ## URIs diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd index 77729388f21..0dd3cfab225 100644 --- a/r/vignettes/install.Rmd +++ b/r/vignettes/install.Rmd @@ -38,7 +38,7 @@ Daily development builds, which are not official releases, can be installed from the Ursa Labs repository: ```r -install.packages("arrow", repos = "https://dl.bintray.com/ursalabs/arrow-r") +install.packages("arrow", repos = "https://arrow-r-nightly.s3.amazonaws.com") ``` There currently are no daily `conda` builds. @@ -92,6 +92,27 @@ satisfy C++ dependencies. +## S3 support + +The `arrow` package allows you to work with data in AWS S3 or in other cloud +storage system that emulate S3. However, support for working with S3 is not +enabled in the default build, and it has additional system requirements. To +enable it, set the environment variable `LIBARROW_MINIMAL=false` or +`NOT_CRAN=true` to choose the full-featured build, or more selectively set +`ARROW_S3=ON`. You also need the following system dependencies: + +* `gcc` >= 4.9 or `clang` >= 3.3; note that the default compiler on CentOS 7 is gcc 4.8.5, which is not sufficient +* CURL: install `libcurl-devel` (rpm) or `libcurl4-openssl-dev` (deb) +* OpenSSL >= 1.0.2: install `openssl-devel` (rpm) or `libssl-dev` (deb) + +The prebuilt C++ binaries come with S3 support enabled, so you will need to meet +these system requirements in order to use them--the package will not install +without them. If you're building everything from source, the install script +will check for the presence of these dependencies and turn off S3 support in the +build if the prerequisites are not met--installation will succeed but without +S3 functionality. If afterwards you install the missing system requirements, +you'll need to reinstall the package in order to enable S3 support. + # How dependencies are resolved In order for the `arrow` R package to work, it needs the Arrow C++ library.