diff --git a/.azure-pipelines/azure-pipelines-linux.yml b/.azure-pipelines/azure-pipelines-linux.yml index 3eed8c52f..61587c22d 100755 --- a/.azure-pipelines/azure-pipelines-linux.yml +++ b/.azure-pipelines/azure-pipelines-linux.yml @@ -57,6 +57,19 @@ jobs: sudo apt-get autoclean -y >& /dev/null df -h displayName: Manage disk space + - script: | + SWAPFILE=/swapfile + # If there is already a swapfile, disable it and remove it + if swapon --show | grep -q $SWAPFILE; then + echo "Disabling existing swapfile..." + sudo swapoff $SWAPFILE || true + fi + [ -f "$SWAPFILE" ] && sudo rm -f $SWAPFILE + sudo fallocate -l 8GiB $SWAPFILE + sudo chmod 600 $SWAPFILE + sudo mkswap $SWAPFILE + sudo swapon $SWAPFILE + displayName: Create swap file # configure qemu binfmt-misc running. This allows us to run docker containers # embedded qemu-static - script: | diff --git a/.ci_support/linux_64_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml b/.ci_support/linux_64_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml index 86cc5b834..8a7be77ce 100644 --- a/.ci_support/linux_64_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml +++ b/.ci_support/linux_64_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml @@ -64,10 +64,22 @@ libutf8proc: - '2.11' lz4_c: - '1.10' +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml b/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml index 23293a6fc..c24ce1f36 100644 --- a/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml +++ b/.ci_support/linux_64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml @@ -64,10 +64,22 @@ libutf8proc: - '2.11' lz4_c: - '1.10' +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.ci_support/linux_aarch64_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml b/.ci_support/linux_aarch64_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml index 2e9510d18..1587d5c19 100644 --- a/.ci_support/linux_aarch64_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml +++ b/.ci_support/linux_aarch64_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml @@ -64,10 +64,22 @@ libutf8proc: - '2.11' lz4_c: - '1.10' +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml b/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml index 65ceb1338..73c426fbd 100644 --- a/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml +++ b/.ci_support/linux_aarch64_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml @@ -64,10 +64,22 @@ libutf8proc: - '2.11' lz4_c: - '1.10' +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.ci_support/linux_ppc64le_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml b/.ci_support/linux_ppc64le_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml index 7c2dcc454..1fa056b00 100644 --- a/.ci_support/linux_ppc64le_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml +++ b/.ci_support/linux_ppc64le_c_compiler_version11cuda_compiler_version11.8cxx_compiler_version11.yaml @@ -64,10 +64,22 @@ libutf8proc: - '2.11' lz4_c: - '1.10' +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.ci_support/linux_ppc64le_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml b/.ci_support/linux_ppc64le_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml index 1b2be163a..5aab5758e 100644 --- a/.ci_support/linux_ppc64le_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml +++ b/.ci_support/linux_ppc64le_c_compiler_version14cuda_compiler_versionNonecxx_compiler_version14.yaml @@ -64,10 +64,22 @@ libutf8proc: - '2.11' lz4_c: - '1.10' +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.ci_support/migrations/python314.yaml b/.ci_support/migrations/python314.yaml new file mode 100644 index 000000000..36ec6b413 --- /dev/null +++ b/.ci_support/migrations/python314.yaml @@ -0,0 +1,43 @@ +# this is intentionally sorted before the 3.13t migrator, because that determines +# the order of application of the migrators; otherwise we'd have to add values for +# is_freethreading and is_abi3 keys here, since that migration extends the zip; +migrator_ts: 1724712607 +__migrator: + commit_message: Rebuild for python 3.14 + migration_number: 1 + operation: key_add + primary_key: python + ordering: + python: + - 3.9.* *_cpython + - 3.10.* *_cpython + - 3.11.* *_cpython + - 3.12.* *_cpython + - 3.13.* *_cp313 + - 3.13.* *_cp313t + - 3.14.* *_cp314 # new entry + paused: false + longterm: true + pr_limit: 5 + max_solver_attempts: 3 # this will make the bot retry "not solvable" stuff 12 times + exclude: + # this shouldn't attempt to modify the python feedstocks + - python + - pypy3.6 + - pypy-meta + - cross-python + - python_abi + exclude_pinned_pkgs: false + ignored_deps_per_node: + matplotlib: + - pyqt + additional_zip_keys: + - channel_sources + +python: +- 3.14.* *_cp314 +# additional entries to add for zip_keys +is_python_min: +- false +channel_sources: +- conda-forge,conda-forge/label/python_rc diff --git a/.ci_support/osx_64_.yaml b/.ci_support/osx_64_.yaml index 2ccd26fd1..5115011ab 100644 --- a/.ci_support/osx_64_.yaml +++ b/.ci_support/osx_64_.yaml @@ -64,10 +64,22 @@ lz4_c: - '1.10' macos_machine: - x86_64-apple-darwin13.4.0 +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.ci_support/osx_arm64_.yaml b/.ci_support/osx_arm64_.yaml index d16a4963b..daa489bb1 100644 --- a/.ci_support/osx_arm64_.yaml +++ b/.ci_support/osx_arm64_.yaml @@ -64,10 +64,22 @@ lz4_c: - '1.10' macos_machine: - arm64-apple-darwin20.0.0 +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.ci_support/win_64_cuda_compiler_version11.8.yaml b/.ci_support/win_64_cuda_compiler_version11.8.yaml index 60e919f3b..e2b421729 100644 --- a/.ci_support/win_64_cuda_compiler_version11.8.yaml +++ b/.ci_support/win_64_cuda_compiler_version11.8.yaml @@ -50,10 +50,22 @@ libutf8proc: - '2.11' lz4_c: - '1.10' +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.ci_support/win_64_cuda_compiler_versionNone.yaml b/.ci_support/win_64_cuda_compiler_versionNone.yaml index 03364011b..a7f71868c 100644 --- a/.ci_support/win_64_cuda_compiler_versionNone.yaml +++ b/.ci_support/win_64_cuda_compiler_versionNone.yaml @@ -50,10 +50,22 @@ libutf8proc: - '2.11' lz4_c: - '1.10' +numpy: +- '2' openssl: - '3.5' orc: - 2.2.1 +pin_run_as_build: + python: + min_pin: x.x + max_pin: x.x +python: +- 3.10.* *_cpython +- 3.11.* *_cpython +- 3.12.* *_cpython +- 3.13.* *_cp313 +- 3.14.* *_cp314 re2: - 2025.08.12 snappy: diff --git a/.scripts/build_steps.sh b/.scripts/build_steps.sh index 780a358ff..14a2f85b2 100755 --- a/.scripts/build_steps.sh +++ b/.scripts/build_steps.sh @@ -80,12 +80,6 @@ else command -v inspect_artifacts >/dev/null 2>&1 && inspect_artifacts --recipe-dir "${RECIPE_ROOT}" -m "${CONFIG_FILE}" || echo "inspect_artifacts needs conda-forge-ci-setup >=4.9.4" ( endgroup "Inspecting artifacts" ) 2> /dev/null - ( startgroup "Validating outputs" ) 2> /dev/null - - validate_recipe_outputs "${FEEDSTOCK_NAME}" - - ( endgroup "Validating outputs" ) 2> /dev/null - ( startgroup "Uploading packages" ) 2> /dev/null if [[ "${UPLOAD_PACKAGES}" != "False" ]] && [[ "${IS_PR_BUILD}" == "False" ]]; then diff --git a/.scripts/run_osx_build.sh b/.scripts/run_osx_build.sh index 361edeb2c..1c4888183 100755 --- a/.scripts/run_osx_build.sh +++ b/.scripts/run_osx_build.sh @@ -104,12 +104,6 @@ else command -v inspect_artifacts >/dev/null 2>&1 && inspect_artifacts --recipe-dir ./recipe -m ./.ci_support/${CONFIG}.yaml || echo "inspect_artifacts needs conda-forge-ci-setup >=4.9.4" ( endgroup "Inspecting artifacts" ) 2> /dev/null - ( startgroup "Validating outputs" ) 2> /dev/null - - validate_recipe_outputs "${FEEDSTOCK_NAME}" - - ( endgroup "Validating outputs" ) 2> /dev/null - ( startgroup "Uploading packages" ) 2> /dev/null if [[ "${UPLOAD_PACKAGES}" != "False" ]] && [[ "${IS_PR_BUILD}" == "False" ]]; then diff --git a/.scripts/run_win_build.bat b/.scripts/run_win_build.bat index 27c552b1a..073c54976 100755 --- a/.scripts/run_win_build.bat +++ b/.scripts/run_win_build.bat @@ -106,12 +106,6 @@ if /i "%CI%" == "azure" ( set "TEMP=%UPLOAD_TEMP%" ) -:: Validate -call :start_group "Validating outputs" -validate_recipe_outputs "%FEEDSTOCK_NAME%" -if !errorlevel! neq 0 exit /b !errorlevel! -call :end_group - if /i "%UPLOAD_PACKAGES%" == "true" ( if /i "%IS_PR_BUILD%" == "false" ( call :start_group "Uploading packages" diff --git a/README.md b/README.md index 02670e3c3..1b727ad7d 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,42 @@ Package license: Apache-2.0 Summary: Executables for inspecting Apache Parquet files +About pyarrow +------------- + +Home: http://github.com/apache/arrow + +Package license: Apache-2.0 + +Summary: Python libraries for Apache Arrow with default capabilities + +About pyarrow-all +----------------- + +Home: http://github.com/apache/arrow + +Package license: Apache-2.0 + +Summary: Python libraries for Apache Arrow with all capabilities + +About pyarrow-core +------------------ + +Home: http://github.com/apache/arrow + +Package license: Apache-2.0 + +Summary: Python libraries for Apache Arrow Core + +About pyarrow-tests +------------------- + +Home: http://github.com/apache/arrow + +Package license: Apache-2.0 + +Summary: Python test files for Apache Arrow + Current build status ==================== @@ -234,6 +270,10 @@ Current release info | [![Conda Recipe](https://img.shields.io/badge/recipe-libarrow--substrait-green.svg)](https://anaconda.org/conda-forge/libarrow-substrait) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/libarrow-substrait.svg)](https://anaconda.org/conda-forge/libarrow-substrait) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/libarrow-substrait.svg)](https://anaconda.org/conda-forge/libarrow-substrait) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/libarrow-substrait.svg)](https://anaconda.org/conda-forge/libarrow-substrait) | | [![Conda Recipe](https://img.shields.io/badge/recipe-libparquet-green.svg)](https://anaconda.org/conda-forge/libparquet) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/libparquet.svg)](https://anaconda.org/conda-forge/libparquet) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/libparquet.svg)](https://anaconda.org/conda-forge/libparquet) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/libparquet.svg)](https://anaconda.org/conda-forge/libparquet) | | [![Conda Recipe](https://img.shields.io/badge/recipe-parquet--utils-green.svg)](https://anaconda.org/conda-forge/parquet-utils) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/parquet-utils.svg)](https://anaconda.org/conda-forge/parquet-utils) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/parquet-utils.svg)](https://anaconda.org/conda-forge/parquet-utils) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/parquet-utils.svg)](https://anaconda.org/conda-forge/parquet-utils) | +| [![Conda Recipe](https://img.shields.io/badge/recipe-pyarrow-green.svg)](https://anaconda.org/conda-forge/pyarrow) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/pyarrow.svg)](https://anaconda.org/conda-forge/pyarrow) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/pyarrow.svg)](https://anaconda.org/conda-forge/pyarrow) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/pyarrow.svg)](https://anaconda.org/conda-forge/pyarrow) | +| [![Conda Recipe](https://img.shields.io/badge/recipe-pyarrow--all-green.svg)](https://anaconda.org/conda-forge/pyarrow-all) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/pyarrow-all.svg)](https://anaconda.org/conda-forge/pyarrow-all) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/pyarrow-all.svg)](https://anaconda.org/conda-forge/pyarrow-all) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/pyarrow-all.svg)](https://anaconda.org/conda-forge/pyarrow-all) | +| [![Conda Recipe](https://img.shields.io/badge/recipe-pyarrow--core-green.svg)](https://anaconda.org/conda-forge/pyarrow-core) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/pyarrow-core.svg)](https://anaconda.org/conda-forge/pyarrow-core) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/pyarrow-core.svg)](https://anaconda.org/conda-forge/pyarrow-core) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/pyarrow-core.svg)](https://anaconda.org/conda-forge/pyarrow-core) | +| [![Conda Recipe](https://img.shields.io/badge/recipe-pyarrow--tests-green.svg)](https://anaconda.org/conda-forge/pyarrow-tests) | [![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/pyarrow-tests.svg)](https://anaconda.org/conda-forge/pyarrow-tests) | [![Conda Version](https://img.shields.io/conda/vn/conda-forge/pyarrow-tests.svg)](https://anaconda.org/conda-forge/pyarrow-tests) | [![Conda Platforms](https://img.shields.io/conda/pn/conda-forge/pyarrow-tests.svg)](https://anaconda.org/conda-forge/pyarrow-tests) | Installing arrow-cpp ==================== @@ -245,16 +285,16 @@ conda config --add channels conda-forge conda config --set channel_priority strict ``` -Once the `conda-forge` channel has been enabled, `apache-arrow-proc, arrow-utils, libarrow, libarrow-acero, libarrow-all, libarrow-compute, libarrow-dataset, libarrow-flight, libarrow-flight-sql, libarrow-gandiva, libarrow-substrait, libparquet, parquet-utils` can be installed with `conda`: +Once the `conda-forge` channel has been enabled, `apache-arrow-proc, arrow-utils, libarrow, libarrow-acero, libarrow-all, libarrow-compute, libarrow-dataset, libarrow-flight, libarrow-flight-sql, libarrow-gandiva, libarrow-substrait, libparquet, parquet-utils, pyarrow, pyarrow-all, pyarrow-core, pyarrow-tests` can be installed with `conda`: ``` -conda install apache-arrow-proc arrow-utils libarrow libarrow-acero libarrow-all libarrow-compute libarrow-dataset libarrow-flight libarrow-flight-sql libarrow-gandiva libarrow-substrait libparquet parquet-utils +conda install apache-arrow-proc arrow-utils libarrow libarrow-acero libarrow-all libarrow-compute libarrow-dataset libarrow-flight libarrow-flight-sql libarrow-gandiva libarrow-substrait libparquet parquet-utils pyarrow pyarrow-all pyarrow-core pyarrow-tests ``` or with `mamba`: ``` -mamba install apache-arrow-proc arrow-utils libarrow libarrow-acero libarrow-all libarrow-compute libarrow-dataset libarrow-flight libarrow-flight-sql libarrow-gandiva libarrow-substrait libparquet parquet-utils +mamba install apache-arrow-proc arrow-utils libarrow libarrow-acero libarrow-all libarrow-compute libarrow-dataset libarrow-flight libarrow-flight-sql libarrow-gandiva libarrow-substrait libparquet parquet-utils pyarrow pyarrow-all pyarrow-core pyarrow-tests ``` It is possible to list all of the versions of `apache-arrow-proc` available on your platform with `conda`: diff --git a/conda-forge.yml b/conda-forge.yml index e72f9ed33..c48dd440a 100644 --- a/conda-forge.yml +++ b/conda-forge.yml @@ -1,6 +1,7 @@ azure: free_disk_space: true - max_parallel: 20 + settings_linux: + swapfile_size: 8GiB settings_win: variables: CONDA_BLD_PATH: C:\\bld\\ diff --git a/recipe/bld.bat b/recipe/bld.bat index b574e4555..683428630 100644 --- a/recipe/bld.bat +++ b/recipe/bld.bat @@ -41,7 +41,8 @@ cmake -G "Ninja" ^ -DARROW_PARQUET:BOOL=ON ^ -DPARQUET_BUILD_EXECUTABLES:BOOL=ON ^ -DARROW_S3:BOOL=ON ^ - -DARROW_SIMD_LEVEL:STRING=NONE ^ + -DARROW_SIMD_LEVEL:STRING=DEFAULT ^ + -DARROW_RUNTIME_SIMD_LEVEL=MAX ^ -DARROW_SUBSTRAIT:BOOL=ON ^ -DARROW_USE_GLOG:BOOL=ON ^ -DARROW_WITH_BROTLI:BOOL=ON ^ diff --git a/recipe/build-pyarrow.bat b/recipe/build-pyarrow.bat new file mode 100644 index 000000000..6218eebb9 --- /dev/null +++ b/recipe/build-pyarrow.bat @@ -0,0 +1,40 @@ +@echo on + +pushd "%SRC_DIR%"\python + +SET ARROW_HOME=%LIBRARY_PREFIX% +SET SETUPTOOLS_SCM_PRETEND_VERSION=%PKG_VERSION% +SET PYARROW_BUILD_TYPE=release +SET PYARROW_WITH_ACERO=1 +SET PYARROW_WITH_DATASET=1 +SET PYARROW_WITH_FLIGHT=1 +SET PYARROW_WITH_GANDIVA=1 +SET PYARROW_WITH_GCS=1 +SET PYARROW_WITH_HDFS=1 +SET PYARROW_WITH_ORC=1 +SET PYARROW_WITH_PARQUET=1 +SET PYARROW_WITH_PARQUET_ENCRYPTION=1 +SET PYARROW_WITH_S3=1 +SET PYARROW_WITH_SUBSTRAIT=1 +SET PYARROW_CMAKE_GENERATOR=Ninja + +:: Enable CUDA support +if "%cuda_compiler_version%"=="None" ( + set "PYARROW_WITH_CUDA=0" +) else ( + set "PYARROW_WITH_CUDA=1" +) + +%PYTHON% setup.py ^ + build_ext ^ + install --single-version-externally-managed ^ + --record=record.txt +if %ERRORLEVEL% neq 0 exit 1 +popd + +if [%PKG_NAME%] NEQ [pyarrow-tests] ( + rd /s /q %SP_DIR%\pyarrow\tests +) + +:: generated by setup.py +rmdir .\python\build /s /q diff --git a/recipe/build-pyarrow.sh b/recipe/build-pyarrow.sh new file mode 100644 index 000000000..8a449b63c --- /dev/null +++ b/recipe/build-pyarrow.sh @@ -0,0 +1,64 @@ +#!/bin/sh +set -ex + +# Build dependencies +export ARROW_HOME=$PREFIX +export PARQUET_HOME=$PREFIX +export SETUPTOOLS_SCM_PRETEND_VERSION=$PKG_VERSION +export PYARROW_BUILD_TYPE=release +export PYARROW_WITH_ACERO=1 +export PYARROW_WITH_AZURE=1 +export PYARROW_WITH_DATASET=1 +export PYARROW_WITH_FLIGHT=1 +export PYARROW_WITH_GANDIVA=1 +export PYARROW_WITH_GCS=1 +export PYARROW_WITH_HDFS=1 +export PYARROW_WITH_ORC=1 +export PYARROW_WITH_PARQUET=1 +export PYARROW_WITH_PARQUET_ENCRYPTION=1 +export PYARROW_WITH_S3=1 +export PYARROW_WITH_SUBSTRAIT=1 +export PYARROW_CMAKE_GENERATOR=Ninja +export PYARROW_CMAKE_OPTIONS="-DARROW_SIMD_LEVEL=NONE" +BUILD_EXT_FLAGS="" + +# Enable CUDA support +if [[ ! -z "${cuda_compiler_version+x}" && "${cuda_compiler_version}" != "None" ]]; then + export PYARROW_WITH_CUDA=1 + if [[ "${build_platform}" != "${target_platform}" ]]; then + export CUDAToolkit_ROOT=${CUDA_HOME} + export CMAKE_LIBRARY_PATH=${CONDA_BUILD_SYSROOT}/lib + fi +else + export PYARROW_WITH_CUDA=0 +fi + +# Resolve: Make Error at cmake_modules/SetupCxxFlags.cmake:338 (message): Unsupported arch flag: -march=. +if [[ "${target_platform}" == "linux-aarch64" ]]; then + export PYARROW_CMAKE_OPTIONS="-DARROW_ARMV8_ARCH=armv8-a ${PYARROW_CMAKE_OPTIONS}" +fi + +if [[ "${target_platform}" == osx-* ]]; then + # See https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk + CXXFLAGS="${CXXFLAGS} -D_LIBCPP_DISABLE_AVAILABILITY" +fi + +if [[ "${target_platform}" == "linux-aarch64" ]] || [[ "${target_platform}" == "linux-ppc64le" ]]; then + # Limit number of threads used to avoid hardware oversubscription + export CMAKE_BUILD_PARALLEL_LEVEL=4 +fi + +cd python + +$PYTHON setup.py \ + build_ext \ + install --single-version-externally-managed \ + --record=record.txt + +if [[ "$PKG_NAME" != "pyarrow-tests" ]]; then + rm -r ${SP_DIR}/pyarrow/tests +fi + +# generated by setup.py +rm -rf build +cd .. diff --git a/recipe/build.sh b/recipe/build.sh index c14c1a9fe..5487c7709 100644 --- a/recipe/build.sh +++ b/recipe/build.sh @@ -90,7 +90,8 @@ cmake -GNinja \ -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON \ -DARROW_S3=ON \ - -DARROW_SIMD_LEVEL=NONE \ + -DARROW_SIMD_LEVEL=DEFAULT \ + -DARROW_RUNTIME_SIMD_LEVEL=MAX \ -DARROW_SUBSTRAIT=ON \ -DARROW_USE_GLOG=ON \ -DARROW_USE_LD_GOLD=ON \ diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 7f18894a1..6cb60be45 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -34,7 +34,7 @@ source: folder: cpp/submodules/parquet-testing build: - number: 1 + number: 2 # for cuda support, building with one version is enough to be compatible with # all later versions, since arrow is only using libcuda, and not libcudart. skip: true # [cuda_compiler_version not in ("None", cuda_compiler_version_min)] @@ -797,6 +797,300 @@ outputs: - LICENSE.txt summary: Executables for manipulating Apache arrow files + - name: pyarrow-core + script: build-pyarrow.sh # [unix] + script: build-pyarrow.bat # [win] + version: {{ version }} + build: + string: py{{ CONDA_PY }}h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}_{{ build_ext }} + ignore_run_exports_from: + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + # we don't need numpy at runtime, just to build + - numpy + rpaths: + - lib/ + - {{ SP_DIR }}/pyarrow + missing_dso_whitelist: + # not actually missing, but installed into SP_DIR, see tests + - '*/arrow_python.dll' # [win] + - '*/arrow_python_flight.dll' # [win] + # pyarrow-core builds with the capabilities but we do not ship them + # to provide the smaller core functionality. + - 'lib/libarrow_acero.*' # [unix] + - 'lib/libarrow_dataset.*' # [unix] + - 'lib/libarrow_substrait.*' # [unix] + - 'lib/libarrow_flight.*' # [unix] + - 'lib/libparquet.*' # [unix] + - 'lib/libgandiva.*' # [unix] + - 'Library/lib/arrow_acero.dll' # [win] + - 'Library/lib/arrow_dataset.dll' # [win] + - 'Library/lib/arrow_substrait.dll' # [win] + - 'Library/lib/arrow_flight.dll' # [win] + - 'Library/lib/parquet.dll' # [win] + - 'Library/lib/gandiva.dll' # [win] + requirements: + build: + - {{ compiler("c") }} + - {{ stdlib("c") }} + - {{ compiler("cxx") }} + # pyarrow does not require nvcc but it needs to link against libraries in libarrow=*=*cuda + - {{ compiler("cuda") }} # [cuda_compiler_version != "None"] + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - cmake + - ninja + host: + # We add all libarrow package dependencies on host in order + # to build pyarrow once with all capabilities. + - {{ pin_subpackage("libarrow-all", exact=True) }} + - clangdev {{ llvm_version }} + - llvmdev {{ llvm_version }} + - zlib + - cython + - numpy + - python + - setuptools + - setuptools-scm + run: + # We ignore the run-exports from libarrow-all and restrict to only + # libarrow, as we don't want the other libraries to be installed when + # running for pyarrow-core, where the aim is a low storage footprint. + - {{ pin_subpackage("libarrow", exact=True) }} + # compute is necessary even for a basic `import pyarrow` + - {{ pin_subpackage("libarrow-compute", exact=True) }} + - python + # this is redundant with libarrow, but we want smithy to pick up that + # cuda_compiler_version_min is present, to populate the CI configs + - __cuda >={{ cuda_compiler_version_min }} # [cuda_compiler_version != "None"] + run_constrained: + - apache-arrow-proc * {{ build_ext }} + # keep lower pin aligned with run_exports from numpy + # https://github.com/conda-forge/numpy-feedstock/blob/main/recipe/meta.yaml + - numpy >=1.21,<3 + + test: + imports: + - pyarrow + # Compute can be imported but the underlying libarrow_acero is not present. + - pyarrow.compute + - pyarrow.orc + - pyarrow.fs + - pyarrow._s3fs + - pyarrow._hdfs + # We can only test importing cuda package but cannot run when a + # CUDA device is not available, for instance, when building from CI. + # On Windows, we cannot even do that due to `nvcuda.dll` not being found, see + # https://conda-forge.org/docs/maintainer/knowledge_base.html#nvcuda-dll-cannot-be-found-on-windows + # However, we check below for (at least) the presence of a correctly-compiled module + - pyarrow.cuda # [cuda_compiler_version != "None" and not win] + commands: + # libraries that depend on python (and hence aren't in libarrow itself) + - test -f ${SP_DIR}/pyarrow/libarrow_python.so # [linux] + - test -f ${SP_DIR}/pyarrow/libarrow_python_flight.so # [linux] + - test -f ${SP_DIR}/pyarrow/libarrow_python_parquet_encryption.so # [linux] + - test -f ${SP_DIR}/pyarrow/libarrow_python.dylib # [osx] + - test -f ${SP_DIR}/pyarrow/libarrow_python_flight.dylib # [osx] + - test -f ${SP_DIR}/pyarrow/libarrow_python_parquet_encryption.dylib # [osx] + - if not exist %SP_DIR%\pyarrow\arrow_python.dll exit 1 # [win] + - if not exist %SP_DIR%\pyarrow\arrow_python_flight.dll exit 1 # [win] + - if not exist %SP_DIR%\pyarrow\arrow_python_parquet_encryption.dll exit 1 # [win] + + - test -f ${SP_DIR}/pyarrow/include/arrow/python/pyarrow.h # [unix] + - if not exist %SP_DIR%\pyarrow\include\arrow\python\pyarrow.h exit 1 # [win] + + - test ! -f ${SP_DIR}/pyarrow/tests/test_array.py # [unix] + - if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win] + # Need to remove dot from PY_VER; %MYVAR:x=y% replaces "x" in %MYVAR% with "y" + - if not exist %SP_DIR%/pyarrow/_cuda.cp%PY_VER:.=%-win_amd64.pyd exit 1 # [win and cuda_compiler_version != "None"] + + # Expected not included libraries + - test ! -f $PREFIX/lib/libarrow_acero${SHLIB_EXT} # [unix] + - test ! -f $PREFIX/lib/libarrow_dataset${SHLIB_EXT} # [unix] + - test ! -f $PREFIX/lib/libarrow_flight${SHLIB_EXT} # [unix] + - test ! -f $PREFIX/lib/libgandiva${SHLIB_EXT} # [unix] + - test ! -f $PREFIX/lib/libparquet${SHLIB_EXT} # [unix] + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python libraries for Apache Arrow Core + + - name: pyarrow + version: {{ version }} + requirements: + host: + # only necessary for run-exports + - python + run: + # Default doesn't contain flight, flight-sql and gandiva + - {{ pin_subpackage("libarrow-acero", exact=True) }} + - {{ pin_subpackage("libarrow-dataset", exact=True) }} + - {{ pin_subpackage("libarrow-substrait", exact=True) }} + - {{ pin_subpackage("libparquet", exact=True) }} + # do not use pin_compatible because pyarrow-core has CUDA/non-CUDA variants + - pyarrow-core {{ version }} *_{{ PKG_BUILDNUM }}_* + - python + + test: + files: + - test_read_parquet.py + imports: + # default pyarrow contains parquet + - pyarrow.dataset + - pyarrow.parquet + commands: + # Expected not included libraries + - test ! -f $PREFIX/lib/libarrow_flight${SHLIB_EXT} # [unix] + - test ! -f $PREFIX/lib/libgandiva${SHLIB_EXT} # [unix] + + - python test_read_parquet.py + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python libraries for Apache Arrow with default capabilities + + - name: pyarrow-all + version: {{ version }} + requirements: + host: + # only necessary for run-exports + - python + run: + - {{ pin_subpackage("libarrow-flight", exact=True) }} + - {{ pin_subpackage("libarrow-flight-sql", exact=True) }} + - {{ pin_subpackage("libarrow-gandiva", exact=True) }} + - pyarrow {{ version }} *_{{ PKG_BUILDNUM }} + - python + + test: + imports: + - pyarrow.flight + - pyarrow.gandiva + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python libraries for Apache Arrow with all capabilities + + - name: pyarrow-tests + script: build-pyarrow.sh # [unix] + script: build-pyarrow.bat # [win] + version: {{ version }} + build: + skip: true # [cuda_compiler_version != "None"] + requirements: + build: + - {{ compiler("c") }} + - {{ stdlib("c") }} + - {{ compiler("cxx") }} + - python # [build_platform != target_platform] + - cross-python_{{ target_platform }} # [build_platform != target_platform] + - cython # [build_platform != target_platform] + - numpy # [build_platform != target_platform] + - cmake + - ninja + host: + - {{ pin_subpackage("libarrow-all", exact=True) }} + - pyarrow-all {{ version }} *_{{ PKG_BUILDNUM }} + - clangdev {{ llvm_version }} + - llvmdev {{ llvm_version }} + - zlib + - cython + - numpy + - python + - setuptools + - setuptools-scm + run: + - pyarrow-all {{ version }} *_{{ PKG_BUILDNUM }} + - python + + {% if not (aarch64 or ppc64le) or py == 311 %} + test: + requires: + # test_cpp_extension_in_python requires a compiler + - {{ compiler("cxx") }} # [linux] + - pytest + - boto3 + - cffi + - cloudpickle + - cython + - fastparquet + - fsspec + - hypothesis + - minio-server + - pandas + - s3fs >=2023 + - scipy + - sparse # [py<314] + # these are generally (far) behind on migrating abseil/grpc/protobuf, + # and using them as test dependencies blocks the migrator unnecessarily + # - pytorch + # - tensorflow + # we're not building java bindings + # - jpype1 + # doesn't get picked up correctly + # - libhdfs3 + source_files: + - cpp/submodules/parquet-testing/data + - testing/data + commands: + - cd ${SP_DIR} # [unix] + - cd %SP_DIR% # [win] + - export ARROW_TEST_DATA="${SRC_DIR}/testing/data" # [unix] + - set "ARROW_TEST_DATA=%SRC_DIR%\testing\data" # [win] + - export PARQUET_TEST_DATA="${SRC_DIR}/cpp/submodules/parquet-testing/data" # [unix] + - set "PARQUET_TEST_DATA=%SRC_DIR%\cpp\submodules\parquet-testing\data" # [win] + + {% set tests_to_skip = "_not_a_real_test" %} + # we do not have GPUs in CI --> cannot test cuda + {% set tests_to_skip = tests_to_skip + " or test_cuda" + " or test_dlpack_cuda_not_supported"%} + # skip tests that raise SIGINT and crash the test suite + {% set tests_to_skip = tests_to_skip + " or (test_csv and test_cancellation)" %} # [linux] + {% set tests_to_skip = tests_to_skip + " or (test_flight and test_interrupt)" %} # [linux] + # skip tests that make invalid(-for-conda) assumptions about the compilers setup + {% set tests_to_skip = tests_to_skip + " or test_cython_api" %} # [unix] + {% set tests_to_skip = tests_to_skip + " or test_visit_strings" %} # [unix] + # skip tests that cannot succeed in emulation + {% set tests_to_skip = tests_to_skip + " or test_debug_memory_pool_disabled" %} # [aarch64 or ppc64le] + {% set tests_to_skip = tests_to_skip + " or test_env_var_io_thread_count" %} # [aarch64 or ppc64le] + # vvvvvvv TESTS THAT SHOULDN'T HAVE TO BE SKIPPED vvvvvvv + # flaky test based on s3 connection + {% set tests_to_skip = tests_to_skip + " or test_s3_real_aws_region_selection" %} + # https://github.com/apache/arrow/issues/45229 + {% set tests_to_skip = tests_to_skip + " or test_sparse_coo_tensor_scipy_roundtrip" %} + # this test is trying to simulate a failure mode using /usr/share/zoneinfo, + # but newer orc will ignore that if it finds a tzinfo in a conda environment + {% set tests_to_skip = tests_to_skip + " or test_timezone_absent" %} + # https://github.com/apache/arrow/issues/43800 + {% set tests_to_skip = tests_to_skip + " or test_cpp_extension_in_python" %} # [osx] + # https://github.com/apache/arrow/issues/43356 + {% set tests_to_skip = tests_to_skip + " or (test_compute and test_assume_timezone)" %} # [aarch64 or ppc64le] + {% set tests_to_skip = tests_to_skip + " or (test_compute and test_strftime)" %} # [aarch64 or ppc64le] + {% set tests_to_skip = tests_to_skip + " or (test_compute and test_round_temporal)" %} # [aarch64 or ppc64le] + {% set tests_to_skip = tests_to_skip + " or test_extract_datetime_components " %} # [aarch64 or ppc64le] + # flaky test that fails regularly on aarch + {% set tests_to_skip = tests_to_skip + " or test_feather_format[serial]" %} # [aarch64 or ppc64le] + # gandiva tests are segfaulting on ppc + {% set tests_to_skip = tests_to_skip + " or test_gandiva" %} # [ppc64le] + # ^^^^^^^ TESTS THAT SHOULDN'T HAVE TO BE SKIPPED ^^^^^^^ + - pytest pyarrow/ -rfEs -k "not ({{ tests_to_skip }})" + {% endif %} + + about: + home: http://github.com/apache/arrow + license: Apache-2.0 + license_file: + - LICENSE.txt + summary: Python test files for Apache Arrow + about: home: http://github.com/apache/arrow license: Apache-2.0 diff --git a/recipe/test_read_parquet.py b/recipe/test_read_parquet.py new file mode 100644 index 000000000..5f76a4e22 --- /dev/null +++ b/recipe/test_read_parquet.py @@ -0,0 +1,5 @@ +import pyarrow as pa +import pyarrow.parquet as pq + +table = pa.Table.from_pydict({"a": [1, 2]}) +pq.write_table(table, "test.parquet")