diff --git a/cpp/README.md b/cpp/README.md index 3091588f1ab..e7ddb0a1338 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -21,10 +21,10 @@ ## System setup -Arrow uses CMake as a build configuration system. Currently, it supports in-source and -out-of-source builds with the latter one being preferred. +Arrow uses CMake as a build configuration system. Currently, it supports +in-source and out-of-source builds with the latter one being preferred. -Build Arrow requires: +Building Arrow requires: * A C++11-enabled compiler. On Linux, gcc 4.8 and higher should be sufficient. * CMake @@ -108,11 +108,11 @@ ASAN, and `ARROW_USE_ASAN` is mutually-exclusive with the valgrind option ### Building/Running fuzzers -Fuzzers can help finding unhandled exceptions and problems with untrusted input that -may lead to crashes, security issues and undefined behavior. They do this by -generating random input data and observing the behavior of the executed code. To build -the fuzzer code, LLVM is required (GCC-based compilers won't work). You can build them -using the following code: +Fuzzers can help finding unhandled exceptions and problems with untrusted input +that may lead to crashes, security issues and undefined behavior. They do this +by generating random input data and observing the behavior of the executed +code. To build the fuzzer code, LLVM is required (GCC-based compilers won't +work). You can build them using the following code: cmake -DARROW_FUZZING=ON -DARROW_USE_ASAN=ON .. @@ -156,29 +156,18 @@ There are some problems that may occur during the compilation process: - libfuzzer was not distributed with your LLVM: `ld: file not found: .../libLLVMFuzzer.a` - your LLVM is too old: `clang: error: unsupported argument 'fuzzer' to option 'fsanitize='` -### Third-party environment variables - -To set up your own specific build toolchain, here are the relevant environment -variables - -* Boost: `BOOST_ROOT` -* Googletest: `GTEST_HOME` (only required to build the unit tests) -* gflags: `GFLAGS_HOME` (only required to build the unit tests) -* Google Benchmark: `GBENCHMARK_HOME` (only required if building benchmarks) -* Flatbuffers: `FLATBUFFERS_HOME` (only required for the IPC extensions) -* Hadoop: `HADOOP_HOME` (only required for the HDFS I/O extensions) -* jemalloc: `JEMALLOC_HOME` -* brotli: `BROTLI_HOME`, can be disabled with `-DARROW_WITH_BROTLI=off` -* lz4: `LZ4_HOME`, can be disabled with `-DARROW_WITH_LZ4=off` -* snappy: `SNAPPY_HOME`, can be disabled with `-DARROW_WITH_SNAPPY=off` -* zlib: `ZLIB_HOME`, can be disabled with `-DARROW_WITH_ZLIB=off` -* zstd: `ZSTD_HOME`, can be disabled with `-DARROW_WITH_ZSTD=off` - -If you have all of your toolchain libraries installed at the same prefix, you -can use the environment variable `$ARROW_BUILD_TOOLCHAIN` to automatically set -all of these variables. Note that `ARROW_BUILD_TOOLCHAIN` will not set -`BOOST_ROOT`, so if you have custom Boost installation, you must set this -environment variable separately. +### Third-party dependencies and configuration + +Arrow depends on a number of third-party libraries. We support these in a few +ways: + +* Building dependencies from source by downloading archives from the internet +* Building dependencies from source using from local archives (to allow offline + builds) +* Building with locally-installed libraries + +See [thirdparty/README.md][5] for details about these options and how to +configure your build toolchain. ### Building Python integration library (optional) @@ -382,3 +371,4 @@ both of these options would be used rarely. Current known uses-cases when they a [2]: https://github.com/apache/arrow/blob/master/cpp/apidoc/Windows.md [3]: https://google.github.io/styleguide/cppguide.html [4]: https://github.com/include-what-you-use/include-what-you-use +[5]: https://github.com/apache/arrow/blob/master/cpp/thirdparty/README.md \ No newline at end of file diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 4dfe0435323..563a314d5f3 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -17,44 +17,9 @@ # ---------------------------------------------------------------------- -# Thirdparty toolchain +# Thirdparty versions, environment variables, source URLs set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty") -set(GFLAGS_VERSION "2.2.0") -set(GTEST_VERSION "1.8.0") -set(GBENCHMARK_VERSION "1.4.1") -set(FLATBUFFERS_VERSION "1.9.0") -set(JEMALLOC_VERSION "17c897976c60b0e6e4f4a365c751027244dada7a") -set(SNAPPY_VERSION "1.1.3") -set(BROTLI_VERSION "v0.6.0") -set(LZ4_VERSION "1.7.5") -set(ZSTD_VERSION "1.2.0") -set(PROTOBUF_VERSION "2.6.0") -set(GRPC_VERSION "94582910ad7f82ad447ecc72e6548cb669e4f7a9") # v1.6.5 -set(ORC_VERSION "cf00b67795717ab3eb04e950780ed6d104109017") - -string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE) - -set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}") -set(EP_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}") - -if (NOT ARROW_VERBOSE_THIRDPARTY_BUILD) - set(EP_LOG_OPTIONS - LOG_CONFIGURE 1 - LOG_BUILD 1 - LOG_INSTALL 1 - LOG_DOWNLOAD 1) - set(Boost_DEBUG FALSE) -else() - set(EP_LOG_OPTIONS) - set(Boost_DEBUG TRUE) -endif() - -if (NOT MSVC) - # Set -fPIC on all external projects - set(EP_CXX_FLAGS "${EP_CXX_FLAGS} -fPIC") - set(EP_C_FLAGS "${EP_C_FLAGS} -fPIC") -endif() if (NOT "$ENV{ARROW_BUILD_TOOLCHAIN}" STREQUAL "") set(FLATBUFFERS_HOME "$ENV{ARROW_BUILD_TOOLCHAIN}") @@ -114,6 +79,145 @@ if (DEFINED ENV{PROTOBUF_HOME}) set(PROTOBUF_HOME "$ENV{PROTOBUF_HOME}") endif() +# ---------------------------------------------------------------------- +# Versions and URLs for toolchain builds, which also can be used to configure +# offline builds + +# Read toolchain versions from cpp/thirdparty/versions.txt +file(STRINGS "${THIRDPARTY_DIR}/versions.txt" TOOLCHAIN_VERSIONS_TXT) +foreach(_VERSION_ENTRY ${TOOLCHAIN_VERSIONS_TXT}) + # Exclude comments + if(_VERSION_ENTRY MATCHES "#.*") + continue() + endif() + + string(REGEX MATCH "^[^=]*" _LIB_NAME ${_VERSION_ENTRY}) + string(REPLACE "${_LIB_NAME}=" "" _LIB_VERSION ${_VERSION_ENTRY}) + + # Skip blank or malformed lines + if(${_LIB_VERSION} STREQUAL "") + continue() + endif() + + # For debugging + message(STATUS "${_LIB_NAME}: ${_LIB_VERSION}") + + set(${_LIB_NAME} "${_LIB_VERSION}") +endforeach() + +if (DEFINED ENV{ARROW_BOOST_URL}) + set(BOOST_SOURCE_URL "$ENV{ARROW_BOOST_URL}") +else() + string(REPLACE "." "_" BOOST_VERSION_UNDERSCORES ${BOOST_VERSION}) + set(BOOST_SOURCE_URL + "https://dl.bintray.com/boostorg/release/${BOOST_VERSION}/source/boost_${BOOST_VERSION_UNDERSCORES}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_GTEST_URL}) + set(GTEST_SOURCE_URL "$ENV{ARROW_GTEST_URL}") +else() + set(GTEST_SOURCE_URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_GFLAGS_URL}) + set(GFLAGS_SOURCE_URL "$ENV{ARROW_GFLAGS_URL}") +else() + set(GFLAGS_SOURCE_URL "https://github.com/gflags/gflags/archive/v${GFLAGS_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_GBENCHMARK_URL}) + set(GBENCHMARK_SOURCE_URL "$ENV{ARROW_GBENCHMARK_URL}") +else() + set(GBENCHMARK_SOURCE_URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz") +endif() + +set(RAPIDJSON_SOURCE_MD5 "badd12c511e081fec6c89c43a7027bce") +if (DEFINED ENV{ARROW_RAPIDJSON_URL}) + set(RAPIDJSON_SOURCE_URL "$ENV{ARROW_RAPIDJSON_URL}") +else() + set(RAPIDJSON_SOURCE_URL "https://github.com/miloyip/rapidjson/archive/v${RAPIDJSON_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_FLATBUFFERS_URL}) + set(FLATBUFFERS_SOURCE_URL "$ENV{ARROW_FLATBUFFERS_URL}") +else() + set(FLATBUFFERS_SOURCE_URL "https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_SNAPPY_URL}) + set(SNAPPY_SOURCE_URL "$ENV{ARROW_SNAPPY_URL}") +else() + set(SNAPPY_SOURCE_URL "https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_BROTLI_URL}) + set(BROTLI_SOURCE_URL "$ENV{ARROW_BROTLI_URL}") +else() + set(BROTLI_SOURCE_URL "https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_LZ4_URL}) + set(LZ4_SOURCE_URL "$ENV{ARROW_LZ4_URL}") +else() + set(LZ4_SOURCE_URL "https://github.com/lz4/lz4/archive/v${LZ4_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_ZLIB_URL}) + set(ZLIB_SOURCE_URL "$ENV{ARROW_ZLIB_URL}") +else() + set(ZLIB_SOURCE_URL "http://zlib.net/fossils/zlib-${ZLIB_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_ZSTD_URL}) + set(ZSTD_SOURCE_URL "$ENV{ARROW_ZSTD_URL}") +else() + set(ZSTD_SOURCE_URL "https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_PROTOBUF_URL}) + set(PROTOBUF_SOURCE_URL "$ENV{ARROW_PROTOBUF_URL}") +else() + set(PROTOBUF_SOURCE_URL "https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protobuf-${PROTOBUF_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_GRPC_URL}) + set(GRPC_SOURCE_URL "$ENV{ARROW_GRPC_URL}") +else() + set(GRPC_SOURCE_URL "https://github.com/grpc/grpc/archive/v${GRPC_VERSION}.tar.gz") +endif() + +if (DEFINED ENV{ARROW_ORC_URL}) + set(ORC_SOURCE_URL "$ENV{ARROW_ORC_URL}") +else() + set(ORC_SOURCE_URL "https://github.com/apache/orc/archive/rel/release-${ORC_VERSION}.tar.gz") +endif() + +# ---------------------------------------------------------------------- +# ExternalProject options + +string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE) + +set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}") +set(EP_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}") + +if (NOT ARROW_VERBOSE_THIRDPARTY_BUILD) + set(EP_LOG_OPTIONS + LOG_CONFIGURE 1 + LOG_BUILD 1 + LOG_INSTALL 1 + LOG_DOWNLOAD 1) + set(Boost_DEBUG FALSE) +else() + set(EP_LOG_OPTIONS) + set(Boost_DEBUG TRUE) +endif() + +if (NOT MSVC) + # Set -fPIC on all external projects + set(EP_CXX_FLAGS "${EP_CXX_FLAGS} -fPIC") + set(EP_C_FLAGS "${EP_C_FLAGS} -fPIC") +endif() + # Ensure that a default make is set if ("${MAKE}" STREQUAL "") if (NOT MSVC) @@ -146,10 +250,6 @@ set(Boost_ADDITIONAL_VERSIONS "1.62.0" "1.61" "1.61.0" "1.62" "1.60.0" "1.60") -list(GET Boost_ADDITIONAL_VERSIONS 2 BOOST_LATEST_VERSION) -string(REPLACE "." "_" BOOST_LATEST_VERSION_IN_PATH ${BOOST_LATEST_VERSION}) -set(BOOST_LATEST_URL - "https://dl.bintray.com/boostorg/release/${BOOST_LATEST_VERSION}/source/boost_${BOOST_LATEST_VERSION_IN_PATH}.tar.gz") if (ARROW_BOOST_VENDORED) set(BOOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/boost_ep-prefix/src/boost_ep") @@ -185,7 +285,7 @@ if (ARROW_BOOST_VENDORED) "cxxflags=-fPIC") endif() ExternalProject_Add(boost_ep - URL ${BOOST_LATEST_URL} + URL ${BOOST_SOURCE_URL} BUILD_BYPRODUCTS ${BOOST_BUILD_PRODUCTS} BUILD_IN_SOURCE 1 CONFIGURE_COMMAND ${BOOST_CONFIGURE_COMMAND} @@ -288,7 +388,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS) endif() ExternalProject_Add(googletest_ep - URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz" + URL ${GTEST_SOURCE_URL} BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB} CMAKE_ARGS ${GTEST_CMAKE_ARGS} ${EP_LOG_OPTIONS}) @@ -314,7 +414,6 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS) if("${GFLAGS_HOME}" STREQUAL "") set(GFLAGS_CMAKE_CXX_FLAGS ${EP_CXX_FLAGS}) - set(GFLAGS_URL "https://github.com/gflags/gflags/archive/v${GFLAGS_VERSION}.tar.gz") set(GFLAGS_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/gflags_ep-prefix/src/gflags_ep") set(GFLAGS_HOME "${GFLAGS_PREFIX}") set(GFLAGS_INCLUDE_DIR "${GFLAGS_PREFIX}/include") @@ -337,7 +436,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS) -DCMAKE_CXX_FLAGS=${GFLAGS_CMAKE_CXX_FLAGS}) ExternalProject_Add(gflags_ep - URL ${GFLAGS_URL} + URL ${GFLAGS_SOURCE_URL} ${EP_LOG_OPTIONS} BUILD_IN_SOURCE 1 BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}" @@ -389,7 +488,7 @@ if(ARROW_BUILD_BENCHMARKS) endif() ExternalProject_Add(gbenchmark_ep - URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz" + URL ${GBENCHMARK_SOURCE_URL} BUILD_BYPRODUCTS "${GBENCHMARK_STATIC_LIB}" CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} ${EP_LOG_OPTIONS}) @@ -414,8 +513,8 @@ if (ARROW_IPC) if("${RAPIDJSON_HOME}" STREQUAL "") ExternalProject_Add(rapidjson_ep PREFIX "${CMAKE_BINARY_DIR}" - URL "https://github.com/miloyip/rapidjson/archive/v1.1.0.tar.gz" - URL_MD5 "badd12c511e081fec6c89c43a7027bce" + URL ${RAPIDJSON_SOURCE_URL} + URL_MD5 ${RAPIDJSON_SOURCE_MD5} CONFIGURE_COMMAND "" BUILD_COMMAND "" BUILD_IN_SOURCE 1 @@ -446,7 +545,7 @@ if (ARROW_IPC) endif() # We always need to do release builds, otherwise flatc will not be installed. ExternalProject_Add(flatbuffers_ep - URL "https://github.com/google/flatbuffers/archive/v${FLATBUFFERS_VERSION}.tar.gz" + URL ${FLATBUFFERS_SOURCE_URL} CMAKE_ARGS "-DCMAKE_CXX_FLAGS=${FLATBUFFERS_CMAKE_CXX_FLAGS}" "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}" @@ -580,7 +679,7 @@ if (ARROW_WITH_ZLIB) -DBUILD_SHARED_LIBS=OFF) ExternalProject_Add(zlib_ep - URL "http://zlib.net/fossils/zlib-1.2.8.tar.gz" + URL ${ZLIB_SOURCE_URL} ${EP_LOG_OPTIONS} BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}" CMAKE_ARGS ${ZLIB_CMAKE_ARGS}) @@ -613,7 +712,6 @@ if (ARROW_WITH_SNAPPY) set(SNAPPY_STATIC_LIB_NAME snappy) endif() set(SNAPPY_STATIC_LIB "${SNAPPY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${SNAPPY_STATIC_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(SNAPPY_SRC_URL "https://github.com/google/snappy/releases/download/${SNAPPY_VERSION}/snappy-${SNAPPY_VERSION}.tar.gz") if (${UPPERCASE_BUILD_TYPE} EQUAL "RELEASE") if (APPLE) @@ -642,7 +740,7 @@ if (ARROW_WITH_SNAPPY) BUILD_IN_SOURCE 1 BUILD_COMMAND ${MAKE} INSTALL_DIR ${SNAPPY_PREFIX} - URL ${SNAPPY_SRC_URL} + URL ${SNAPPY_SOURCE_URL} CMAKE_ARGS ${SNAPPY_CMAKE_ARGS} BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}") else() @@ -652,7 +750,7 @@ if (ARROW_WITH_SNAPPY) BUILD_IN_SOURCE 1 BUILD_COMMAND ${MAKE} INSTALL_DIR ${SNAPPY_PREFIX} - URL ${SNAPPY_SRC_URL} + URL ${SNAPPY_SOURCE_URL} BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}") endif() set(SNAPPY_VENDORED 1) @@ -696,7 +794,7 @@ if (ARROW_WITH_BROTLI) -DBUILD_SHARED_LIBS=OFF) ExternalProject_Add(brotli_ep - URL "https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz" + URL ${BROTLI_SOURCE_URL} BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" "${BROTLI_STATIC_LIBRARY_DEC}" "${BROTLI_STATIC_LIBRARY_COMMON}" ${BROTLI_BUILD_BYPRODUCTS} ${EP_LOG_OPTIONS} @@ -758,7 +856,7 @@ if (ARROW_WITH_LZ4) endif() ExternalProject_Add(lz4_ep - URL "https://github.com/lz4/lz4/archive/v${LZ4_VERSION}.tar.gz" + URL ${LZ4_SOURCE_URL} ${EP_LOG_OPTIONS} UPDATE_COMMAND "" ${LZ4_PATCH_COMMAND} @@ -811,7 +909,7 @@ if (ARROW_WITH_ZSTD) endif() ExternalProject_Add(zstd_ep - URL "https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz" + URL ${ZSTD_SOURCE_URL} ${EP_LOG_OPTIONS} UPDATE_COMMAND "" ${ZSTD_PATCH_COMMAND} @@ -891,12 +989,11 @@ if (ARROW_ORC) set (PROTOBUF_HOME "${PROTOBUF_PREFIX}") set (PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include") set (PROTOBUF_STATIC_LIB "${PROTOBUF_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}protobuf${CMAKE_STATIC_LIBRARY_SUFFIX}") - set (PROTOBUF_SRC_URL "https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protobuf-${PROTOBUF_VERSION}.tar.gz") ExternalProject_Add(protobuf_ep CONFIGURE_COMMAND "./configure" "--disable-shared" "--prefix=${PROTOBUF_PREFIX}" "CXXFLAGS=${EP_CXX_FLAGS}" BUILD_IN_SOURCE 1 - URL ${PROTOBUF_SRC_URL} + URL ${PROTOBUF_SOURCE_URL} BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" ${EP_LOG_OPTIONS}) @@ -952,7 +1049,7 @@ if (ARROW_ORC) -DZLIB_HOME=${ZLIB_HOME}) ExternalProject_Add(orc_ep - URL "https://github.com/apache/orc/archive/${ORC_VERSION}.tar.gz" + URL ${ORC_SOURCE_URL} BUILD_BYPRODUCTS ${ORC_STATIC_LIB} CMAKE_ARGS ${ORC_CMAKE_ARGS} ${EP_LOG_OPTIONS}) diff --git a/cpp/thirdparty/README.md b/cpp/thirdparty/README.md new file mode 100644 index 00000000000..f4f89f57bdd --- /dev/null +++ b/cpp/thirdparty/README.md @@ -0,0 +1,89 @@ + + +# Arrow C++ Thirdparty Dependencies + +The version numbers for our third-party dependencies are listed in +`thirdparty/versions.txt`. This is used by the CMake build system as well as +the dependency downloader script (see below), which can be used to set up +offline builds. + +## Configuring your own build toolchain + +To set up your own specific build toolchain, here are the relevant environment +variables + +* Boost: `BOOST_ROOT` +* Googletest: `GTEST_HOME` (only required to build the unit tests) +* gflags: `GFLAGS_HOME` (only required to build the unit tests) +* Google Benchmark: `GBENCHMARK_HOME` (only required if building benchmarks) +* Flatbuffers: `FLATBUFFERS_HOME` (only required for -DARROW_IPC=on, which is + the default) +* Hadoop: `HADOOP_HOME` (only required for the HDFS I/O extensions) +* jemalloc: `JEMALLOC_HOME` +* brotli: `BROTLI_HOME`, can be disabled with `-DARROW_WITH_BROTLI=off` +* lz4: `LZ4_HOME`, can be disabled with `-DARROW_WITH_LZ4=off` +* snappy: `SNAPPY_HOME`, can be disabled with `-DARROW_WITH_SNAPPY=off` +* zlib: `ZLIB_HOME`, can be disabled with `-DARROW_WITH_ZLIB=off` +* zstd: `ZSTD_HOME`, can be disabled with `-DARROW_WITH_ZSTD=off` + +If you have all of your toolchain libraries installed at the same prefix, you +can use the environment variable `$ARROW_BUILD_TOOLCHAIN` to automatically set +all of these variables. Note that `ARROW_BUILD_TOOLCHAIN` will not set +`BOOST_ROOT`, so if you have custom Boost installation, you must set this +environment variable separately. + +## Configuring for offline builds + +If you do not use the above variables to direct the Arrow build system to +preinstalled dependencies, they will be built automatically by the build +system. The source archive for each dependency will be downloaded via the +internet, which can cause issues in environments with limited access to the +internet. + +To enable offline builds, you can download the source artifacts yourself and +use environment variables of the form `ARROW_$LIBRARY_URL` to direct the build +system to read from a local file rather than accessing the internet. + +To make this easier for you, we have prepared a script +`thirdparty/download_dependencies.sh` which will download the correct version +of each dependency to a directory of your choosing. It will print a list of +bash-style environment variable statements at the end to use for your build +script: + +```shell +$ ./thirdparty/download_dependencies $HOME/arrow-thirdparty-deps +# some output omitted + +# Environment variables for offline Arrow build +export ARROW_BOOST_URL=$HOME/arrow-thirdparty-deps/boost.tar.gz +export ARROW_GTEST_URL=$HOME/arrow-thirdparty-deps/gtest.tar.gz +export ARROW_GFLAGS_URL=$HOME/arrow-thirdparty-deps/gflags.tar.gz +export ARROW_GBENCHMARK_URL=$HOME/arrow-thirdparty-deps/gbenchmark.tar.gz +export ARROW_FLATBUFFERS_URL=$HOME/arrow-thirdparty-deps/flatbuffers.tar.gz +export ARROW_RAPIDJSON_URL=$HOME/arrow-thirdparty-deps/rapidjson.tar.gz +export ARROW_SNAPPY_URL=$HOME/arrow-thirdparty-deps/snappy.tar.gz +export ARROW_BROTLI_URL=$HOME/arrow-thirdparty-deps/brotli.tar.gz +export ARROW_LZ4_URL=$HOME/arrow-thirdparty-deps/lz4.tar.gz +export ARROW_ZLIB_URL=$HOME/arrow-thirdparty-deps/zlib.tar.gz +export ARROW_ZSTD_URL=$HOME/arrow-thirdparty-deps/zstd.tar.gz +export ARROW_PROTOBUF_URL=$HOME/arrow-thirdparty-deps/protobuf.tar.gz +export ARROW_GRPC_URL=$HOME/arrow-thirdparty-deps/grpc.tar.gz +export ARROW_ORC_URL=$HOME/arrow-thirdparty-deps/orc.tar.gz +``` diff --git a/cpp/thirdparty/download_dependencies.sh b/cpp/thirdparty/download_dependencies.sh new file mode 100755 index 00000000000..2d8bee41aae --- /dev/null +++ b/cpp/thirdparty/download_dependencies.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script downloads all the thirdparty dependencies as a series of tarballs +# that can be used for offline builds, etc. + +set -e + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit +fi + +_DST=$1 + +# To change toolchain versions, edit versions.txt +source $SOURCE_DIR/versions.txt + +BOOST_UNDERSCORE_VERSION=`echo $BOOST_VERSION | sed 's/\./_/g'` +wget -c -O $_DST/boost.tar.gz https://dl.bintray.com/boostorg/release/$BOOST_VERSION/source/boost_$BOOST_UNDERSCORE_VERSION.tar.gz + +wget -c -O $_DST/gtest.tar.gz https://github.com/google/googletest/archive/release-$GTEST_VERSION.tar.gz + +wget -c -O $_DST/gflags.tar.gz https://github.com/gflags/gflags/archive/v$GFLAGS_VERSION.tar.gz + +wget -c -O $_DST/gbenchmark.tar.gz https://github.com/google/benchmark/archive/v$GBENCHMARK_VERSION.tar.gz + +wget -c -O $_DST/flatbuffers.tar.gz https://github.com/google/flatbuffers/archive/v$FLATBUFFERS_VERSION.tar.gz + +wget -c -O $_DST/rapidjson.tar.gz https://github.com/miloyip/rapidjson/archive/v$RAPIDJSON_VERSION.tar.gz + +wget -c -O $_DST/snappy.tar.gz https://github.com/google/snappy/releases/download/$SNAPPY_VERSION/snappy-$SNAPPY_VERSION.tar.gz + +wget -c -O $_DST/brotli.tar.gz https://github.com/google/brotli/archive/$BROTLI_VERSION.tar.gz + +wget -c -O $_DST/lz4.tar.gz https://github.com/lz4/lz4/archive/v$LZ4_VERSION.tar.gz + +wget -c -O $_DST/zlib.tar.gz http://zlib.net/fossils/zlib-$ZLIB_VERSION.tar.gz + +wget -c -O $_DST/zstd.tar.gz https://github.com/facebook/zstd/archive/v$ZSTD_VERSION.tar.gz + +wget -c -O $_DST/protobuf.tar.gz https://github.com/google/protobuf/releases/download/v$PROTOBUF_VERSION/protobuf-$PROTOBUF_VERSION.tar.gz + +wget -c -O $_DST/grpc.tar.gz https://github.com/grpc/grpc/archive/v$GRPC_VERSION.tar.gz + +wget -c -O $_DST/orc.tar.gz https://github.com/apache/orc/archive/rel/release-$ORC_VERSION.tar.gz + +echo " +# Environment variables for offline Arrow build +export ARROW_BOOST_URL=$_DST/boost.tar.gz +export ARROW_GTEST_URL=$_DST/gtest.tar.gz +export ARROW_GFLAGS_URL=$_DST/gflags.tar.gz +export ARROW_GBENCHMARK_URL=$_DST/gbenchmark.tar.gz +export ARROW_FLATBUFFERS_URL=$_DST/flatbuffers.tar.gz +export ARROW_RAPIDJSON_URL=$_DST/rapidjson.tar.gz +export ARROW_SNAPPY_URL=$_DST/snappy.tar.gz +export ARROW_BROTLI_URL=$_DST/brotli.tar.gz +export ARROW_LZ4_URL=$_DST/lz4.tar.gz +export ARROW_ZLIB_URL=$_DST/zlib.tar.gz +export ARROW_ZSTD_URL=$_DST/zstd.tar.gz +export ARROW_PROTOBUF_URL=$_DST/protobuf.tar.gz +export ARROW_GRPC_URL=$_DST/grpc.tar.gz +export ARROW_ORC_URL=$_DST/orc.tar.gz +" diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt new file mode 100644 index 00000000000..554c7196c22 --- /dev/null +++ b/cpp/thirdparty/versions.txt @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Toolchain library versions + +BOOST_VERSION=1.67.0 +GTEST_VERSION=1.8.0 +GFLAGS_VERSION=2.2.0 +GBENCHMARK_VERSION=1.4.1 +FLATBUFFERS_VERSION=1.9.0 +RAPIDJSON_VERSION=1.1.0 +JEMALLOC_VERSION=17c897976c60b0e6e4f4a365c751027244dada7a +SNAPPY_VERSION=1.1.3 +BROTLI_VERSION=v0.6.0 +LZ4_VERSION=1.7.5 +ZLIB_VERSION=1.2.8 +ZSTD_VERSION=1.2.0 +PROTOBUF_VERSION=2.6.0 +GRPC_VERSION=1.12.1 +ORC_VERSION=1.5.1