diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index bce722ffa999d..195941e5b1389 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -30,6 +30,7 @@ if "%JOB%" == "Static_Crt_Build" ( pushd cpp\build-debug cmake -G "%GENERATOR%" ^ + -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^ -DARROW_USE_STATIC_CRT=ON ^ -DARROW_BOOST_USE_SHARED=OFF ^ -DARROW_BUILD_SHARED=OFF ^ @@ -45,6 +46,7 @@ if "%JOB%" == "Static_Crt_Build" ( pushd cpp\build-release cmake -G "%GENERATOR%" ^ + -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^ -DARROW_USE_STATIC_CRT=ON ^ -DARROW_BOOST_USE_SHARED=OFF ^ -DARROW_BUILD_SHARED=OFF ^ @@ -70,6 +72,7 @@ if "%JOB%" == "Build_Debug" ( pushd cpp\build-debug cmake -G "%GENERATOR%" ^ + -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^ -DARROW_BOOST_USE_SHARED=OFF ^ -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^ -DARROW_BUILD_STATIC=OFF ^ diff --git a/ci/cpp-msvc-build-main.bat b/ci/cpp-msvc-build-main.bat index e5eef1e5d8387..ef961b2e0f26e 100644 --- a/ci/cpp-msvc-build-main.bat +++ b/ci/cpp-msvc-build-main.bat @@ -19,7 +19,7 @@ @rem (i.e. for usual configurations) set ARROW_HOME=%CONDA_PREFIX%\Library -set CMAKE_ARGS= +set CMAKE_ARGS=-DARROW_VERBOSE_THIRDPARTY_BUILD=OFF if "%JOB%" == "Toolchain" ( set CMAKE_ARGS=%CMAKE_ARGS% -DARROW_WITH_BZ2=ON diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index c35dd4a1dc6a4..bbac55b1fc22f 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1053,38 +1053,38 @@ if (ARROW_WITH_ZSTD) # ZSTD if("${ZSTD_HOME}" STREQUAL "") - set(ZSTD_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-prefix/src/zstd_ep") - set(ZSTD_INCLUDE_DIR "${ZSTD_BUILD_DIR}/lib") + set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-install") + set(ZSTD_INCLUDE_DIR "${ZSTD_PREFIX}/include") + + set(ZSTD_CMAKE_ARGS + "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}" + "-DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}" + "-DZSTD_BUILD_PROGRAMS=off" + "-DZSTD_BUILD_SHARED=off" + "-DZSTD_BUILD_STATIC=on" + "-DZSTD_MULTITHREAD_SUPPORT=off") if (MSVC) + set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/lib/zstd_static.lib") if (ARROW_USE_STATIC_CRT) - if (${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") - set(ZSTD_RUNTIME_LIBRARY_LINKAGE "/p:RuntimeLibrary=MultiThreadedDebug") - else() - set(ZSTD_RUNTIME_LIBRARY_LINKAGE "/p:RuntimeLibrary=MultiThreaded") - endif() + set(ZSTD_CMAKE_ARGS ${ZSTD_CMAKE_ARGS} "-DZSTD_USE_STATIC_RUNTIME=on") endif() - set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/libzstd_static.lib") - set(ZSTD_BUILD_COMMAND BUILD_COMMAND msbuild ${ZSTD_BUILD_DIR}/build/VS2010/zstd.sln /t:Build /v:minimal /p:Configuration=${CMAKE_BUILD_TYPE} - ${ZSTD_RUNTIME_LIBRARY_LINKAGE} /p:Platform=x64 /p:PlatformToolset=v140 - /p:OutDir=${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/ /p:SolutionDir=${ZSTD_BUILD_DIR}/build/VS2010/ ) - set(ZSTD_PATCH_COMMAND PATCH_COMMAND git --git-dir=. apply --verbose --whitespace=fix ${CMAKE_SOURCE_DIR}/build-support/zstd_msbuild_gl_runtimelibrary_params.patch) else() - set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/lib/libzstd.a") - set(ZSTD_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-zstd-lib.sh) + set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/lib/libzstd.a") + # Only pass our C flags on Unix as on MSVC it leads to a + # "incompatible command-line options" error + set(ZSTD_CMAKE_ARGS ${ZSTD_CMAKE_ARGS} + "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}" + "-DCMAKE_C_FLAGS=${EP_C_FLAGS}") endif() ExternalProject_Add(zstd_ep - URL ${ZSTD_SOURCE_URL} - ${EP_LOG_OPTIONS} - UPDATE_COMMAND "" - ${ZSTD_PATCH_COMMAND} - CONFIGURE_COMMAND "" - INSTALL_COMMAND "" - BINARY_DIR ${ZSTD_BUILD_DIR} - BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB} - ${ZSTD_BUILD_COMMAND} - ) + ${EP_LOG_OPTIONS} + CMAKE_ARGS ${ZSTD_CMAKE_ARGS} + SOURCE_SUBDIR "build/cmake" + INSTALL_DIR ${ZSTD_PREFIX} + URL ${ZSTD_SOURCE_URL} + BUILD_BYPRODUCTS "${ZSTD_STATIC_LIB}") set(ZSTD_VENDORED 1) else() diff --git a/cpp/src/arrow/util/compression_zstd.cc b/cpp/src/arrow/util/compression_zstd.cc index 4064f29c61c7c..8bf02be3f2485 100644 --- a/cpp/src/arrow/util/compression_zstd.cc +++ b/cpp/src/arrow/util/compression_zstd.cc @@ -24,6 +24,7 @@ #include #include "arrow/status.h" +#include "arrow/util/logging.h" #include "arrow/util/macros.h" using std::size_t; @@ -34,6 +35,12 @@ namespace util { // XXX level = 1 probably doesn't compress very much constexpr int kZSTDDefaultCompressionLevel = 1; +static Status ZSTDError(size_t ret, const char* prefix_msg) { + std::stringstream ss; + ss << prefix_msg << ZSTD_getErrorName(ret); + return Status::IOError(ss.str()); +} + // ---------------------------------------------------------------------- // ZSTD decompressor implementation @@ -47,7 +54,7 @@ class ZSTDDecompressor : public Decompressor { finished_ = false; size_t ret = ZSTD_initDStream(stream_); if (ZSTD_isError(ret)) { - return ZSTDError(ret, "zstd init failed: "); + return ZSTDError(ret, "ZSTD init failed: "); } else { return Status::OK(); } @@ -69,7 +76,7 @@ class ZSTDDecompressor : public Decompressor { size_t ret; ret = ZSTD_decompressStream(stream_, &out_buf, &in_buf); if (ZSTD_isError(ret)) { - return ZSTDError(ret, "zstd decompress failed: "); + return ZSTDError(ret, "ZSTD decompress failed: "); } *bytes_read = static_cast(in_buf.pos); *bytes_written = static_cast(out_buf.pos); @@ -81,12 +88,6 @@ class ZSTDDecompressor : public Decompressor { bool IsFinished() override { return finished_; } protected: - Status ZSTDError(size_t ret, const char* prefix_msg) { - std::stringstream ss; - ss << prefix_msg << ZSTD_getErrorName(ret); - return Status::IOError(ss.str()); - } - ZSTD_DStream* stream_; bool finished_; }; @@ -103,7 +104,7 @@ class ZSTDCompressor : public Compressor { Status Init() { size_t ret = ZSTD_initCStream(stream_, kZSTDDefaultCompressionLevel); if (ZSTD_isError(ret)) { - return ZSTDError(ret, "zstd init failed: "); + return ZSTDError(ret, "ZSTD init failed: "); } else { return Status::OK(); } @@ -119,12 +120,6 @@ class ZSTDCompressor : public Compressor { bool* should_retry) override; protected: - Status ZSTDError(size_t ret, const char* prefix_msg) { - std::stringstream ss; - ss << prefix_msg << ZSTD_getErrorName(ret); - return Status::IOError(ss.str()); - } - ZSTD_CStream* stream_; }; @@ -144,7 +139,7 @@ Status ZSTDCompressor::Compress(int64_t input_len, const uint8_t* input, size_t ret; ret = ZSTD_compressStream(stream_, &out_buf, &in_buf); if (ZSTD_isError(ret)) { - return ZSTDError(ret, "zstd compress failed: "); + return ZSTDError(ret, "ZSTD compress failed: "); } *bytes_read = static_cast(in_buf.pos); *bytes_written = static_cast(out_buf.pos); @@ -162,7 +157,7 @@ Status ZSTDCompressor::Flush(int64_t output_len, uint8_t* output, int64_t* bytes size_t ret; ret = ZSTD_flushStream(stream_, &out_buf); if (ZSTD_isError(ret)) { - return ZSTDError(ret, "zstd flush failed: "); + return ZSTDError(ret, "ZSTD flush failed: "); } *bytes_written = static_cast(out_buf.pos); *should_retry = ret > 0; @@ -180,7 +175,7 @@ Status ZSTDCompressor::End(int64_t output_len, uint8_t* output, int64_t* bytes_w size_t ret; ret = ZSTD_endStream(stream_, &out_buf); if (ZSTD_isError(ret)) { - return ZSTDError(ret, "zstd end failed: "); + return ZSTDError(ret, "ZSTD end failed: "); } *bytes_written = static_cast(out_buf.pos); *should_retry = ret > 0; @@ -206,10 +201,20 @@ Status ZSTDCodec::MakeDecompressor(std::shared_ptr* out) { Status ZSTDCodec::Decompress(int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) { - int64_t decompressed_size = - ZSTD_decompress(output_buffer, static_cast(output_len), input, - static_cast(input_len)); - if (decompressed_size != output_len) { + if (output_buffer == nullptr) { + // We may pass a NULL 0-byte output buffer but some zstd versions demand + // a valid pointer: https://github.com/facebook/zstd/issues/1385 + static uint8_t empty_buffer[1]; + DCHECK_EQ(output_len, 0); + output_buffer = empty_buffer; + } + + size_t ret = ZSTD_decompress(output_buffer, static_cast(output_len), input, + static_cast(input_len)); + if (ZSTD_isError(ret)) { + return ZSTDError(ret, "ZSTD decompression failed: "); + } + if (static_cast(ret) != output_len) { return Status::IOError("Corrupt ZSTD compressed data."); } return Status::OK(); @@ -223,12 +228,13 @@ int64_t ZSTDCodec::MaxCompressedLen(int64_t input_len, Status ZSTDCodec::Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len, uint8_t* output_buffer, int64_t* output_length) { - *output_length = + size_t ret = ZSTD_compress(output_buffer, static_cast(output_buffer_len), input, static_cast(input_len), kZSTDDefaultCompressionLevel); - if (ZSTD_isError(*output_length)) { - return Status::IOError("ZSTD compression failure."); + if (ZSTD_isError(ret)) { + return ZSTDError(ret, "ZSTD compression failed: "); } + *output_length = static_cast(ret); return Status::OK(); } diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 8fbfb388e8b67..51a0c5c59135a 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -34,5 +34,5 @@ RAPIDJSON_VERSION=v1.1.0 SNAPPY_VERSION=1.1.3 THRIFT_VERSION=0.11.0 ZLIB_VERSION=1.2.8 -ZSTD_VERSION=v1.2.0 +ZSTD_VERSION=v1.3.7 RE2_VERSION=2018-10-01