diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index 4b90e49257c..538caaa5b5f 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -60,16 +60,19 @@ cmake -G "%GENERATOR%" %CMAKE_ARGS% ^ -DARROW_BUILD_EXAMPLES=ON ^ -DARROW_BUILD_STATIC=OFF ^ -DARROW_BUILD_TESTS=ON ^ + -DARROW_COMPUTE=ON ^ -DARROW_CSV=ON ^ -DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^ -DARROW_DATASET=ON ^ -DARROW_ENABLE_TIMING_TESTS=OFF ^ + -DARROW_FILESYSTEM=ON ^ -DARROW_FLIGHT=%ARROW_BUILD_FLIGHT% ^ -DARROW_FLIGHT_SQL=%ARROW_BUILD_FLIGHT_SQL% ^ -DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^ + -DARROW_HDFS=ON ^ + -DARROW_JSON=ON ^ -DARROW_MIMALLOC=ON ^ -DARROW_PARQUET=ON ^ - -DARROW_PYTHON=ON ^ -DARROW_S3=%ARROW_S3% ^ -DARROW_SUBSTRAIT=ON ^ -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^ diff --git a/ci/docker/conda-python-hdfs.dockerfile b/ci/docker/conda-python-hdfs.dockerfile index 30056ea42cf..94da3e2e094 100644 --- a/ci/docker/conda-python-hdfs.dockerfile +++ b/ci/docker/conda-python-hdfs.dockerfile @@ -42,12 +42,16 @@ COPY ci/etc/hdfs-site.xml $HADOOP_HOME/etc/hadoop/ # build cpp with tests ENV CC=gcc \ CXX=g++ \ + ARROW_BUILD_TESTS=ON \ + ARROW_COMPUTE=ON \ + ARROW_CSV=ON \ + ARROW_DATASET=ON \ + ARROW_FILESYSTEM=ON \ ARROW_FLIGHT=OFF \ ARROW_GANDIVA=OFF \ - ARROW_PLASMA=OFF \ - ARROW_PARQUET=ON \ - PARQUET_REQUIRE_ENCRYPTION=ON \ - ARROW_ORC=OFF \ ARROW_HDFS=ON \ - ARROW_PYTHON=ON \ - ARROW_BUILD_TESTS=ON + ARROW_JSON=ON \ + ARROW_ORC=OFF \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=OFF \ + PARQUET_REQUIRE_ENCRYPTION=ON diff --git a/ci/docker/conda-python-spark.dockerfile b/ci/docker/conda-python-spark.dockerfile index 95f85ef8ee2..861d83fe607 100644 --- a/ci/docker/conda-python-spark.dockerfile +++ b/ci/docker/conda-python-spark.dockerfile @@ -37,7 +37,11 @@ RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark # build cpp with tests ENV CC=gcc \ CXX=g++ \ - ARROW_PYTHON=ON \ - ARROW_HDFS=ON \ ARROW_BUILD_TESTS=OFF \ + ARROW_COMPUTE=ON \ + ARROW_CSV=ON \ + ARROW_DATASET=ON \ + ARROW_FILESYSTEM=ON \ + ARROW_HDFS=ON \ + ARROW_JSON=ON \ SPARK_VERSION=${spark} diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile index 865a44a9182..85cf5f3a93b 100644 --- a/ci/docker/conda-python.dockerfile +++ b/ci/docker/conda-python.dockerfile @@ -37,10 +37,14 @@ RUN mamba install -q -y \ COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_gcs_testbench.sh default -ENV ARROW_PYTHON=ON \ - ARROW_BUILD_STATIC=OFF \ +ENV ARROW_BUILD_STATIC=OFF \ ARROW_BUILD_TESTS=OFF \ ARROW_BUILD_UTILITIES=OFF \ + ARROW_COMPUTE=ON \ + ARROW_CSV=ON \ + ARROW_DATASET=ON \ + ARROW_FILESYSTEM=ON \ + ARROW_HDFS=ON \ + ARROW_JSON=ON \ ARROW_TENSORFLOW=ON \ - ARROW_USE_GLOG=OFF \ - ARROW_HDFS=ON + ARROW_USE_GLOG=OFF diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index a415f1d5a45..9b27358a69a 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -96,10 +96,15 @@ RUN /arrow/ci/scripts/r_deps.sh /arrow && \ ENV ARROW_BUILD_STATIC=OFF \ ARROW_BUILD_TESTS=OFF \ ARROW_BUILD_UTILITIES=OFF \ + ARROW_COMPUTE=ON \ + ARROW_CSV=ON \ + ARROW_DATASET=ON \ + ARROW_FILESYSTEM=ON \ ARROW_FLIGHT=ON \ ARROW_GCS=ON \ ARROW_GLIB_VAPI=false \ - ARROW_PYTHON=ON \ + ARROW_HDFS=ON \ + ARROW_JSON=ON \ ARROW_S3=ON \ ARROW_USE_GLOG=OFF \ CMAKE_UNITY_BUILD=ON diff --git a/ci/docker/linux-apt-python-3.dockerfile b/ci/docker/linux-apt-python-3.dockerfile index 78f6a12c1c2..19f3666ced4 100644 --- a/ci/docker/linux-apt-python-3.dockerfile +++ b/ci/docker/linux-apt-python-3.dockerfile @@ -45,8 +45,13 @@ RUN if [ "${numba}" != "" ]; then \ /arrow/ci/scripts/install_numba.sh ${numba} \ ; fi -ENV ARROW_PYTHON=ON \ - ARROW_BUILD_STATIC=OFF \ +ENV ARROW_BUILD_STATIC=OFF \ ARROW_BUILD_TESTS=OFF \ ARROW_BUILD_UTILITIES=OFF \ - ARROW_USE_GLOG=OFF \ + ARROW_COMPUTE=ON \ + ARROW_CSV=ON \ + ARROW_DATASET=ON \ + ARROW_FILESYSTEM=ON \ + ARROW_HDFS=ON \ + ARROW_JSON=ON \ + ARROW_USE_GLOG=OFF diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index 7083bfa3d95..971078b3601 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -103,13 +103,18 @@ ENV \ ARROW_BUILD_STATIC=OFF \ ARROW_BUILD_TESTS=OFF \ ARROW_BUILD_UTILITIES=OFF \ + ARROW_COMPUTE=ON \ + ARROW_CSV=ON \ + ARROW_DATASET=ON \ + ARROW_FILESYSTEM=ON \ ARROW_FLIGHT=OFF \ ARROW_GANDIVA=OFF \ + ARROW_HDFS=OFF \ + ARROW_JSON=ON \ ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=OFF \ ARROW_PARQUET=ON \ ARROW_PLASMA=OFF \ - ARROW_PYTHON=ON \ ARROW_S3=ON \ ARROW_USE_CCACHE=ON \ ARROW_USE_GLOG=OFF \ diff --git a/ci/docker/linux-dnf-python-3.dockerfile b/ci/docker/linux-dnf-python-3.dockerfile index 62dc72899e7..c37febb4e00 100644 --- a/ci/docker/linux-dnf-python-3.dockerfile +++ b/ci/docker/linux-dnf-python-3.dockerfile @@ -36,8 +36,13 @@ RUN pip install \ -r arrow/python/requirements-build.txt \ -r arrow/python/requirements-test.txt -ENV ARROW_PYTHON=ON \ - ARROW_BUILD_STATIC=OFF \ +ENV ARROW_BUILD_STATIC=OFF \ ARROW_BUILD_TESTS=OFF \ ARROW_BUILD_UTILITIES=OFF \ - ARROW_USE_GLOG=OFF \ + ARROW_COMPUTE=ON \ + ARROW_CSV=ON \ + ARROW_DATASET=ON \ + ARROW_FILESYSTEM=ON \ + ARROW_HDFS=ON \ + ARROW_JSON=ON \ + ARROW_USE_GLOG=OFF diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index bb3c2b1bf13..b3d9e0d3ec1 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -104,7 +104,6 @@ cmake \ -DARROW_ORC=${ARROW_ORC:-OFF} \ -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_PLASMA=${ARROW_PLASMA:-OFF} \ - -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ -DARROW_S3=${ARROW_S3:-OFF} \ -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \ diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index cdd2bd3a400..fd24c58d630 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -96,25 +96,27 @@ cmake \ -DARROW_BUILD_SHARED=ON \ -DARROW_BUILD_STATIC=OFF \ -DARROW_BUILD_TESTS=OFF \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ -DARROW_DATASET=${ARROW_DATASET} \ -DARROW_DEPENDENCY_SOURCE="VCPKG" \ -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_FILESYSTEM=ON \ -DARROW_FLIGHT=${ARROW_FLIGHT} \ -DARROW_GANDIVA=${ARROW_GANDIVA} \ -DARROW_GCS=${ARROW_GCS} \ -DARROW_HDFS=${ARROW_HDFS} \ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ + -DARROW_JSON=ON \ -DARROW_MIMALLOC=${ARROW_MIMALLOC} \ -DARROW_ORC=${ARROW_ORC} \ -DARROW_PACKAGE_KIND="python-wheel-macos" \ -DARROW_PARQUET=${ARROW_PARQUET} \ - -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} \ -DARROW_PLASMA=${ARROW_PLASMA} \ - -DARROW_PYTHON=ON \ -DARROW_RPATH_ORIGIN=ON \ - -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT} \ -DARROW_S3=${ARROW_S3} \ -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL} \ + -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT} \ -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \ -DARROW_USE_CCACHE=ON \ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ @@ -129,9 +131,9 @@ cmake \ -DCMAKE_INSTALL_PREFIX=${build_dir}/install \ -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ - -DOPENSSL_USE_STATIC_LIBS=ON \ -DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \ -DORC_SOURCE=BUNDLED \ + -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} \ -DVCPKG_MANIFEST_MODE=OFF \ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ -G ${CMAKE_GENERATOR} \ diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh index 4953da57905..2aea55ed70f 100755 --- a/ci/scripts/python_wheel_manylinux_build.sh +++ b/ci/scripts/python_wheel_manylinux_build.sh @@ -89,31 +89,31 @@ pushd /tmp/arrow-build # https://github.com/aws/aws-sdk-cpp/issues/1809 is fixed and vcpkg # ships the fix. cmake \ - -DARROW_BROTLI_USE_SHARED=OFF \ -DARROW_BUILD_SHARED=ON \ -DARROW_BUILD_STATIC=OFF \ -DARROW_BUILD_TESTS=OFF \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ -DARROW_DATASET=${ARROW_DATASET} \ -DARROW_DEPENDENCY_SOURCE="VCPKG" \ -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_FILESYSTEM=ON \ -DARROW_FLIGHT=${ARROW_FLIGHT} \ -DARROW_GANDIVA=${ARROW_GANDIVA} \ -DARROW_GCS=${ARROW_GCS} \ -DARROW_HDFS=${ARROW_HDFS} \ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ + -DARROW_JSON=ON \ -DARROW_MIMALLOC=${ARROW_MIMALLOC} \ -DARROW_ORC=${ARROW_ORC} \ -DARROW_PACKAGE_KIND="python-wheel-manylinux${MANYLINUX_VERSION}" \ -DARROW_PARQUET=${ARROW_PARQUET} \ - -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} \ -DARROW_PLASMA=${ARROW_PLASMA} \ - -DARROW_PYTHON=ON \ -DARROW_RPATH_ORIGIN=ON \ - -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT} \ -DARROW_S3=${ARROW_S3} \ + -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT} \ -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \ -DARROW_USE_CCACHE=ON \ - -DARROW_UTF8PROC_USE_SHARED=OFF \ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \ @@ -125,9 +125,9 @@ cmake \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_INSTALL_PREFIX=/tmp/arrow-dist \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ - -DOPENSSL_USE_STATIC_LIBS=ON \ -DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \ -DORC_SOURCE=BUNDLED \ + -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} \ -DVCPKG_MANIFEST_MODE=OFF \ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ ${ARROW_EXTRA_CMAKE_FLAGS} \ diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 9c1aa1428de..d137cd8a985 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -62,21 +62,23 @@ cmake ^ -DARROW_BUILD_SHARED=ON ^ -DARROW_BUILD_STATIC=OFF ^ -DARROW_BUILD_TESTS=OFF ^ + -DARROW_COMPUTE=ON ^ + -DARROW_CSV=ON ^ -DARROW_CXXFLAGS="/MP" ^ -DARROW_DATASET=%ARROW_DATASET% ^ -DARROW_DEPENDENCY_SOURCE=VCPKG ^ -DARROW_DEPENDENCY_USE_SHARED=OFF ^ + -DARROW_FILESYSTEM=ON ^ -DARROW_FLIGHT=%ARROW_FLIGHT% ^ -DARROW_GANDIVA=%ARROW_GANDIVA% ^ -DARROW_HDFS=%ARROW_HDFS% ^ + -DARROW_JSON=ON ^ -DARROW_MIMALLOC=%ARROW_MIMALLOC% ^ -DARROW_ORC=%ARROW_ORC% ^ -DARROW_PACKAGE_KIND="python-wheel-windows" ^ -DARROW_PARQUET=%ARROW_PARQUET% ^ - -DPARQUET_REQUIRE_ENCRYPTION=%PARQUET_REQUIRE_ENCRYPTION% ^ - -DARROW_PYTHON=ON ^ - -DARROW_SUBSTRAIT=%ARROW_SUBSTRAIT% ^ -DARROW_S3=%ARROW_S3% ^ + -DARROW_SUBSTRAIT=%ARROW_SUBSTRAIT% ^ -DARROW_TENSORFLOW=%ARROW_TENSORFLOW% ^ -DARROW_WITH_BROTLI=%ARROW_WITH_BROTLI% ^ -DARROW_WITH_BZ2=%ARROW_WITH_BZ2% ^ @@ -90,6 +92,7 @@ cmake ^ -DCMAKE_INSTALL_PREFIX=C:\arrow-dist ^ -DCMAKE_UNITY_BUILD=%CMAKE_UNITY_BUILD% ^ -DMSVC_LINK_VERBOSE=ON ^ + -DPARQUET_REQUIRE_ENCRYPTION=%PARQUET_REQUIRE_ENCRYPTION% ^ -DVCPKG_MANIFEST_MODE=OFF ^ -DVCPKG_TARGET_TRIPLET=%VCGPK_TARGET_TRIPLET% ^ -G "%CMAKE_GENERATOR%" ^ diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index a7cdb07f3e2..8adbb53bb86 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -117,6 +117,50 @@ "ARROW_GANDIVA": "ON" } }, + { + "name": "features-python-minimal", + "inherits": [ + "features-minimal" + ], + "hidden": true, + "cacheVariables": { + "ARROW_COMPUTE": "ON", + "ARROW_CSV": "ON", + "ARROW_FILESYSTEM": "ON", + "ARROW_JSON": "ON" + } + }, + { + "name": "features-python", + "inherits": [ + "features-main" + ], + "hidden": true, + "cacheVariables": { + "ARROW_COMPUTE": "ON", + "ARROW_CSV": "ON", + "ARROW_DATASET": "ON", + "ARROW_FILESYSTEM": "ON", + "ARROW_JSON": "ON", + "ARROW_ORC": "ON" + } + }, + { + "name": "features-python-maximal", + "inherits": [ + "features-cuda", + "features-filesystems", + "features-flight", + "features-gandiva", + "features-main", + "features-python-minimal" + ], + "hidden": true, + "cacheVariables": { + "ARROW_ORC": "ON", + "PARQUET_REQUIRE_ENCRYPTION": "ON" + } + }, { "name": "features-maximal", "inherits": [ @@ -124,10 +168,10 @@ "features-cuda", "features-filesystems", "features-flight", - "features-gandiva" + "features-gandiva", + "features-python-maximal" ], "hidden": true, - "displayName": "Debug build with everything enabled (except benchmarks and CUDA)", "cacheVariables": { "ARROW_BUILD_EXAMPLES": "ON", "ARROW_BUILD_UTILITIES": "ON", @@ -185,6 +229,24 @@ "displayName": "Debug build with tests and Gandiva", "cacheVariables": {} }, + { + "name": "ninja-debug-python-minimal", + "inherits": ["base-debug", "features-python-minimal"], + "displayName": "Debug build for PyArrow with minimal features", + "cacheVariables": {} + }, + { + "name": "ninja-debug-python", + "inherits": ["base-debug", "features-python"], + "displayName": "Debug build for PyArrow with common features (for backward compatibility)", + "cacheVariables": {} + }, + { + "name": "ninja-debug-python-maximal", + "inherits": ["base-debug", "features-python-maximal"], + "displayName": "Debug build for PyArrow with everything enabled (except CUDA)", + "cacheVariables": {} + }, { "name": "ninja-debug-maximal", "inherits": ["base-debug", "features-maximal"], @@ -228,6 +290,24 @@ "displayName": "Release build with Gandiva", "cacheVariables": {} }, + { + "name": "ninja-release-python-minimal", + "inherits": ["base-release", "features-python-minimal"], + "displayName": "Release build for PyArrow with minimal features", + "cacheVariables": {} + }, + { + "name": "ninja-release-python", + "inherits": ["base-release", "features-python"], + "displayName": "Release build for PyArrow with common features (for backward compatibility)", + "cacheVariables": {} + }, + { + "name": "ninja-release-python-maximal", + "inherits": ["base-release", "features-python-maximal"], + "displayName": "Release build for PyArrow with everything enabled (except CUDA)", + "cacheVariables": {} + }, { "name": "ninja-release-maximal", "inherits": ["base-release", "features-maximal"], diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index d5ebf18d56f..b56918e602c 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -122,6 +122,8 @@ endmacro() macro(resolve_option_dependencies) if(MSVC_TOOLCHAIN) + # ARROW-17817: ORC can't be built on Windows. + set(ARROW_ORC OFF) # Plasma using glog is not fully tested on windows. set(ARROW_USE_GLOG OFF) endif() @@ -382,6 +384,18 @@ takes precedence over ccache if a storage backend is configured" ON) define_option(ARROW_PLASMA "Build the plasma object store along with Arrow" OFF) + define_option(ARROW_PYTHON + "Build some components needed by PyArrow.;\ +(This is a deprecated option. Use CMake presets instead.)" + OFF + DEPENDS + ARROW_COMPUTE + ARROW_CSV + ARROW_DATASET + ARROW_FILESYSTEM + ARROW_HDFS + ARROW_JSON) + define_option(ARROW_S3 "Build Arrow with S3 support (requires the AWS SDK for C++)" OFF) define_option(ARROW_SKYHOOK @@ -676,6 +690,9 @@ macro(config_summary_message) endforeach() + if(ARROW_PYTHON) + message(WARNING "ARROW_PYTHON is deprecated. Use CMake presets instead.") + endif() endmacro() macro(config_summary_json) diff --git a/dev/archery/archery/lang/cpp.py b/dev/archery/archery/lang/cpp.py index f113b43ba6e..b83b56971bb 100644 --- a/dev/archery/archery/lang/cpp.py +++ b/dev/archery/archery/lang/cpp.py @@ -134,8 +134,14 @@ def __init__(self, self.with_parquet = coalesce(with_parquet, True) if self.with_python: - self.with_zlib = coalesce(with_zlib, True) + self.with_compute = coalesce(with_compute, True) + self.with_csv = coalesce(with_csv, True) + self.with_dataset = coalesce(with_dataset, True) + self.with_filesystem = coalesce(with_filesystem, True) + self.with_hdfs = coalesce(with_hdfs, True) + self.with_json = coalesce(with_json, True) self.with_lz4 = coalesce(with_lz4, True) + self.with_zlib = coalesce(with_zlib, True) if self.with_dataset: self.with_filesystem = coalesce(with_filesystem, True) @@ -229,7 +235,6 @@ def _gen_defs(self): yield ("ARROW_JEMALLOC", truthifier(self.with_jemalloc)) yield ("ARROW_PARQUET", truthifier(self.with_parquet)) yield ("ARROW_PLASMA", truthifier(self.with_plasma)) - yield ("ARROW_PYTHON", truthifier(self.with_python)) yield ("ARROW_S3", truthifier(self.with_s3)) # Compressions diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat index 396f0475e29..e7fddd1ff7c 100644 --- a/dev/release/verify-release-candidate.bat +++ b/dev/release/verify-release-candidate.bat @@ -94,13 +94,16 @@ cmake -G "%GENERATOR%" ^ -DARROW_BOOST_USE_SHARED=ON ^ -DARROW_BUILD_STATIC=OFF ^ -DARROW_BUILD_TESTS=ON ^ + -DARROW_COMPUTE=ON ^ + -DARROW_CSV=ON ^ -DARROW_CXXFLAGS="/MP" ^ -DARROW_DATASET=ON ^ + -DARROW_FILESYSTEM=ON ^ -DARROW_FLIGHT=ON ^ + -DARROW_HDFS=ON ^ + -DARROW_JSON=ON ^ -DARROW_MIMALLOC=ON ^ -DARROW_PARQUET=ON ^ - -DPARQUET_REQUIRE_ENCRYPTION=ON ^ - -DARROW_PYTHON=ON ^ -DARROW_WITH_BROTLI=ON ^ -DARROW_WITH_BZ2=ON ^ -DARROW_WITH_LZ4=ON ^ @@ -110,6 +113,7 @@ cmake -G "%GENERATOR%" ^ -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^ -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^ -DCMAKE_UNITY_BUILD=ON ^ + -DPARQUET_REQUIRE_ENCRYPTION=ON ^ .. || exit /B cmake --build . --target INSTALL --config Release || exit /B 1 diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 73fdd53b8f2..133cdf90274 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -600,11 +600,14 @@ test_and_install_cpp() { -DARROW_BUILD_INTEGRATION=ON \ -DARROW_BUILD_TESTS=ON \ -DARROW_BUILD_UTILITIES=ON \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ -DARROW_CUDA=${ARROW_CUDA} \ -DARROW_DATASET=ON \ -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-$DEFAULT_DEPENDENCY_SOURCE} \ - -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL} \ + -DARROW_FILESYSTEM=ON \ -DARROW_FLIGHT=${ARROW_FLIGHT} \ + -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL} \ -DARROW_GANDIVA=${ARROW_GANDIVA} \ -DARROW_GCS=${ARROW_GCS} \ -DARROW_HDFS=ON \ @@ -612,7 +615,6 @@ test_and_install_cpp() { -DARROW_ORC=ON \ -DARROW_PARQUET=ON \ -DARROW_PLASMA=${ARROW_PLASMA} \ - -DARROW_PYTHON=ON \ -DARROW_S3=${ARROW_S3} \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ -DARROW_VERBOSE_THIRDPARTY_BUILD=ON \ diff --git a/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat b/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat index 5ef5d01acc6..02de305eaaa 100644 --- a/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat +++ b/dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat @@ -15,38 +15,41 @@ if "%cuda_compiler_version%"=="None" ( ) cmake -G "Ninja" ^ - -DBUILD_SHARED_LIBS=ON ^ - -DCMAKE_INSTALL_PREFIX="%LIBRARY_PREFIX%" ^ - -DARROW_DEPENDENCY_SOURCE=SYSTEM ^ - -DARROW_PACKAGE_PREFIX="%LIBRARY_PREFIX%" ^ - -DLLVM_TOOLS_BINARY_DIR="%LIBRARY_BIN%" ^ - -DPython3_EXECUTABLE="%PYTHON%" ^ - -DARROW_WITH_BZ2:BOOL=ON ^ - -DARROW_WITH_ZLIB:BOOL=ON ^ - -DARROW_WITH_ZSTD:BOOL=ON ^ - -DARROW_WITH_LZ4:BOOL=ON ^ - -DARROW_WITH_SNAPPY:BOOL=ON ^ - -DARROW_WITH_BROTLI:BOOL=ON ^ -DARROW_BOOST_USE_SHARED:BOOL=ON ^ + -DARROW_BUILD_STATIC:BOOL=OFF ^ -DARROW_BUILD_TESTS:BOOL=OFF ^ -DARROW_BUILD_UTILITIES:BOOL=OFF ^ - -DARROW_BUILD_STATIC:BOOL=OFF ^ - -DCMAKE_BUILD_TYPE=release ^ - -DARROW_SIMD_LEVEL=NONE ^ - -DARROW_PYTHON:BOOL=ON ^ - -DARROW_MIMALLOC:BOOL=ON ^ + -DARROW_COMPUTE:BOOL=ON ^ + -DARROW_CSV:BOOL=ON ^ -DARROW_DATASET:BOOL=ON ^ + -DARROW_DEPENDENCY_SOURCE=SYSTEM ^ + -DARROW_FILESYSTEM:BOOL=ON ^ -DARROW_FLIGHT:BOOL=ON ^ -DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS:BOOL=OFF ^ - -DARROW_HDFS:BOOL=ON ^ - -DARROW_GCS:BOOL=OFF ^ - -DARROW_PARQUET:BOOL=ON ^ - -DPARQUET_REQUIRE_ENCRYPTION:BOOL=ON ^ -DARROW_GANDIVA:BOOL=ON ^ + -DARROW_GCS:BOOL=OFF ^ + -DARROW_HDFS:BOOL=ON ^ + -DARROW_JSON:BOOL=ON ^ + -DARROW_MIMALLOC:BOOL=ON ^ -DARROW_ORC:BOOL=ON ^ + -DARROW_PACKAGE_PREFIX="%LIBRARY_PREFIX%" ^ + -DARROW_PARQUET:BOOL=ON ^ -DARROW_S3:BOOL=ON ^ + -DARROW_SIMD_LEVEL=NONE ^ + -DARROW_WITH_BROTLI:BOOL=ON ^ + -DARROW_WITH_BZ2:BOOL=ON ^ + -DARROW_WITH_LZ4:BOOL=ON ^ + -DARROW_WITH_SNAPPY:BOOL=ON ^ + -DARROW_WITH_ZLIB:BOOL=ON ^ + -DARROW_WITH_ZSTD:BOOL=ON ^ + -DBUILD_SHARED_LIBS=ON ^ -DBoost_NO_BOOST_CMAKE=ON ^ + -DCMAKE_BUILD_TYPE=release ^ + -DCMAKE_INSTALL_PREFIX="%LIBRARY_PREFIX%" ^ -DCMAKE_UNITY_BUILD=ON ^ + -DLLVM_TOOLS_BINARY_DIR="%LIBRARY_BIN%" ^ + -DPARQUET_REQUIRE_ENCRYPTION:BOOL=ON ^ + -DPython3_EXECUTABLE="%PYTHON%" ^ %EXTRA_CMAKE_ARGS% ^ .. if errorlevel 1 exit 1 diff --git a/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh b/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh old mode 100644 new mode 100755 index 7b13e92235c..9a45b391617 --- a/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh +++ b/dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh @@ -60,23 +60,24 @@ cmake \ -DARROW_BUILD_STATIC=OFF \ -DARROW_BUILD_TESTS=OFF \ -DARROW_BUILD_UTILITIES=OFF \ - -DBUILD_SHARED_LIBS=ON \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ -DARROW_CXXFLAGS="${CXXFLAGS}" \ - -DARROW_GANDIVA_PC_CXX_FLAGS="${ARROW_GANDIVA_PC_CXX_FLAGS}" \ -DARROW_DATASET=ON \ -DARROW_DEPENDENCY_SOURCE=SYSTEM \ + -DARROW_FILESYSTEM=ON \ -DARROW_FLIGHT=ON \ -DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=ON \ + -DARROW_GANDIVA_PC_CXX_FLAGS="${ARROW_GANDIVA_PC_CXX_FLAGS}" \ -DARROW_GCS=${ARROW_GCS} \ -DARROW_HDFS=ON \ -DARROW_JEMALLOC=ON \ + -DARROW_JSON=ON \ -DARROW_MIMALLOC=ON \ -DARROW_ORC=ON \ -DARROW_PACKAGE_PREFIX=$PREFIX \ -DARROW_PARQUET=ON \ - -DPARQUET_REQUIRE_ENCRYPTION=ON \ -DARROW_PLASMA=ON \ - -DARROW_PYTHON=ON \ -DARROW_S3=ON \ -DARROW_SIMD_LEVEL=NONE \ -DARROW_USE_LD_GOLD=ON \ @@ -86,13 +87,15 @@ cmake \ -DARROW_WITH_SNAPPY=ON \ -DARROW_WITH_ZLIB=ON \ -DARROW_WITH_ZSTD=ON \ + -DBUILD_SHARED_LIBS=ON \ -DCMAKE_BUILD_TYPE=release \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_INSTALL_PREFIX=$PREFIX \ -DLLVM_TOOLS_BINARY_DIR=$PREFIX/bin \ - -DPython3_EXECUTABLE=${PYTHON} \ + -DPARQUET_REQUIRE_ENCRYPTION=ON \ -DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc \ + -DPython3_EXECUTABLE=${PYTHON} \ -GNinja \ ${EXTRA_CMAKE_ARGS} \ .. diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index 86b08ff6fbe..b91e29cc9e4 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -72,17 +72,22 @@ def install # link against system libc++ instead of llvm provided libc++ ENV.remove "HOMEBREW_LIBRARY_PATHS", Formula["llvm"].opt_lib args = %W[ + -DARROW_COMPUTE=ON + -DARROW_CSV=ON + -DARROW_DATASET=ON + -DARROW_FILESYSTEM=ON -DARROW_FLIGHT=ON -DARROW_GANDIVA=ON -DARROW_GCS=ON + -DARROW_HDFS=ON -DARROW_INSTALL_NAME_RPATH=OFF -DARROW_JEMALLOC=ON + -DARROW_JSON=ON -DARROW_MIMALLOC=ON -DARROW_ORC=ON -DARROW_PARQUET=ON -DARROW_PLASMA=ON -DARROW_PROTOBUF_USE_SHARED=ON - -DARROW_PYTHON=ON -DARROW_S3=ON -DARROW_WITH_BROTLI=ON -DARROW_WITH_BZ2=ON diff --git a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat index 6423720c225..8f160ee7c4f 100644 --- a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat +++ b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat @@ -63,7 +63,6 @@ cmake -G "Visual Studio 16 2019" -A x64 ^ -DARROW_FLIGHT=OFF ^ -DARROW_MIMALLOC=ON ^ -DARROW_PARQUET=ON ^ - -DARROW_PYTHON=OFF ^ -DARROW_WITH_BROTLI=ON ^ -DARROW_WITH_BZ2=ON ^ -DARROW_WITH_LZ4=ON ^ diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.amd64.yml index c347bc70cb6..98043636ed9 100644 --- a/dev/tasks/verify-rc/github.macos.amd64.yml +++ b/dev/tasks/verify-rc/github.macos.amd64.yml @@ -71,6 +71,8 @@ jobs: USE_CONDA: 1 {% endif %} run: | + {% if not use_conda %} export CC=$(brew --prefix llvm)/bin/clang export CXX=$(brew --prefix llvm)/bin/clang++ + {% endif %} arrow/dev/release/verify-release-candidate.sh {{ release|default("") }} {{ rc|default("") }} diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst index 0ef61685ced..595c3bfcb4d 100644 --- a/docs/source/developers/cpp/building.rst +++ b/docs/source/developers/cpp/building.rst @@ -338,11 +338,11 @@ boolean flags to ``cmake``. * ``-DPARQUET_REQUIRE_ENCRYPTION=ON``: Parquet Modular Encryption * ``-DARROW_PLASMA=ON``: Plasma Shared Memory Object Store * ``-DARROW_PLASMA_JAVA_CLIENT=ON``: Build Java client for Plasma -* ``-DARROW_PYTHON=ON``: Arrow Python C++ integration library (required for - building pyarrow). This library must be built against the same Python version - for which you are building pyarrow. NumPy must also be installed. Enabling - this option also enables ``ARROW_COMPUTE``, ``ARROW_CSV``, ``ARROW_DATASET``, - ``ARROW_FILESYSTEM``, ``ARROW_HDFS``, and ``ARROW_JSON``. +* ``-DARROW_PYTHON=ON``: This option is deprecated since 10.0.0. This + will be removed in a future release. Use CMake presets instead. Or + you can enable ``ARROW_COMPUTE``, ``ARROW_CSV``, ``ARROW_DATASET``, + ``ARROW_FILESYSTEM``, ``ARROW_HDFS``, and ``ARROW_JSON`` directly + instead. * ``-DARROW_S3=ON``: Support for Amazon S3-compatible filesystems * ``-DARROW_WITH_RE2=ON`` Build with support for regular expressions using the re2 library, on by default and used when ``ARROW_COMPUTE`` or ``ARROW_GANDIVA`` is ``ON`` diff --git a/docs/source/developers/cpp/development.rst b/docs/source/developers/cpp/development.rst index d9a1b20db2c..48fd5c1a31c 100644 --- a/docs/source/developers/cpp/development.rst +++ b/docs/source/developers/cpp/development.rst @@ -185,22 +185,28 @@ in a new out-of-source CMake build directory like so: mkdir -p $ARROW_ROOT/cpp/iwyu cd $ARROW_ROOT/cpp/iwyu cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - -DARROW_PYTHON=ON \ - -DARROW_PARQUET=ON \ - -DARROW_FLIGHT=ON \ - -DARROW_PLASMA=ON \ - -DARROW_GANDIVA=ON \ -DARROW_BUILD_BENCHMARKS=ON \ -DARROW_BUILD_BENCHMARKS_REFERENCE=ON \ -DARROW_BUILD_TESTS=ON \ -DARROW_BUILD_UTILITIES=ON \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ + -DARROW_DATASET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_FLIGHT=ON \ + -DARROW_GANDIVA=ON \ + -DARROW_HDFS=ON \ + -DARROW_JSON=ON \ + -DARROW_PARQUET=ON \ + -DARROW_PLASMA=ON \ -DARROW_S3=ON \ -DARROW_WITH_BROTLI=ON \ -DARROW_WITH_BZ2=ON \ -DARROW_WITH_LZ4=ON \ -DARROW_WITH_SNAPPY=ON \ -DARROW_WITH_ZLIB=ON \ - -DARROW_WITH_ZSTD=ON .. + -DARROW_WITH_ZSTD=ON \ + .. In order for IWYU to run on the desired component in the codebase, it must be enabled by the CMake configuration flags. Once this is done, you can run IWYU diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst index c30efd2358f..eac08783d5d 100644 --- a/docs/source/developers/python.rst +++ b/docs/source/developers/python.rst @@ -162,8 +162,8 @@ For running the benchmarks, see :ref:`python-benchmarks`. .. _build_pyarrow: -Building on Linux and MacOS -============================= +Building on Linux and macOS +=========================== System Requirements ------------------- @@ -313,21 +313,24 @@ created above (stored in ``$ARROW_HOME``): $ mkdir arrow/cpp/build $ pushd arrow/cpp/build - $ cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_BUILD_TYPE=Debug \ + -DARROW_BUILD_TESTS=ON \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ -DARROW_DATASET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_HDFS=ON \ + -DARROW_JSON=ON \ + -DARROW_PARQUET=ON \ + -DARROW_WITH_BROTLI=ON \ -DARROW_WITH_BZ2=ON \ - -DARROW_WITH_ZLIB=ON \ - -DARROW_WITH_ZSTD=ON \ -DARROW_WITH_LZ4=ON \ -DARROW_WITH_SNAPPY=ON \ - -DARROW_WITH_BROTLI=ON \ - -DARROW_PARQUET=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON \ - -DARROW_PYTHON=ON \ - -DARROW_BUILD_TESTS=ON \ .. $ make -j4 $ make install @@ -533,13 +536,18 @@ Let's configure, build and install the Arrow C++ libraries: $ cmake -G "%PYARROW_CMAKE_GENERATOR%" ^ -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^ -DCMAKE_UNITY_BUILD=ON ^ + -DARROW_COMPUTE=ON ^ + -DARROW_CSV=ON ^ -DARROW_CXXFLAGS="/WX /MP" ^ - -DARROW_WITH_LZ4=on ^ - -DARROW_WITH_SNAPPY=on ^ - -DARROW_WITH_ZLIB=on ^ - -DARROW_WITH_ZSTD=on ^ - -DARROW_PARQUET=on ^ - -DARROW_PYTHON=on ^ + -DARROW_DATASET=ON ^ + -DARROW_FILESYSTEM=ON ^ + -DARROW_HDFS=ON ^ + -DARROW_JSON=ON ^ + -DARROW_PARQUET=ON ^ + -DARROW_WITH_LZ4=ON ^ + -DARROW_WITH_SNAPPY=ON ^ + -DARROW_WITH_ZLIB=ON ^ + -DARROW_WITH_ZSTD=ON ^ .. $ cmake --build . --target INSTALL --config Release $ popd @@ -603,10 +611,15 @@ configuration of the Arrow C++ library build: $ pushd arrow\cpp\build $ cmake -G "%PYARROW_CMAKE_GENERATOR%" ^ -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^ - -DARROW_CXXFLAGS="/WX /MP" ^ - -DARROW_PARQUET=on ^ - -DARROW_PYTHON=on ^ -DARROW_BUILD_TESTS=ON ^ + -DARROW_COMPUTE=ON ^ + -DARROW_CSV=ON ^ + -DARROW_CXXFLAGS="/WX /MP" ^ + -DARROW_DATASET=ON ^ + -DARROW_FILESYSTEM=ON ^ + -DARROW_HDFS=ON ^ + -DARROW_JSON=ON ^ + -DARROW_PARQUET=ON ^ .. $ cmake --build . --target INSTALL --config Release $ popd