From 3752109d64e3a02eaaa636a037141a51fae46819 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Sat, 4 Jan 2025 10:55:40 +0100 Subject: [PATCH] GH-44855: [Python][Packaging] Use delvewheel to repair Windows wheels (#35323) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change We need to ship the C++ standard library with our Windows wheels, as it is not guaranteed that a recent enough version is present on the system. However, some other Python libraries may require an even more recent version than the one we ship. This may incur crashes when PyArrow is imported before such other Python library, as the older version of the C++ standard library would be used by both. ### What changes are included in this PR? Use a [fixed-up version](https://github.com/adang1345/delvewheel/pull/59) of delvewheel that allows us to name-mangle an individual DLL, and name-mangle `msvcp140.dll` to ensure that other Python libraries do not reuse the version we ship. ### Are these changes tested? By regular wheel build tests. * Closes: #44855 * GitHub Issue: #33981 * GitHub Issue: #44855 Lead-authored-by: Antoine Pitrou Co-authored-by: Raúl Cumplido Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- ci/scripts/python_wheel_windows_build.bat | 30 ++++++++++++++++++++-- ci/scripts/python_wheel_windows_test.bat | 4 +-- dev/tasks/python-wheels/github.windows.yml | 8 +++--- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index 1f1d5dca721d9..fe079e1862b06 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -20,6 +20,12 @@ echo "Building windows wheel..." call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat" +@echo on + +@REM Install a more recent msvcp140.dll in C:\Windows\System32 +choco install -r -y --no-progress vcredist140 +choco upgrade -r -y --no-progress vcredist140 +dir C:\Windows\System32\msvcp140.dll echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ===" del /s /q C:\arrow-build @@ -121,7 +127,27 @@ set ARROW_HOME=C:\arrow-dist set CMAKE_PREFIX_PATH=C:\arrow-dist pushd C:\arrow\python -@REM bundle the msvc runtime -cp "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Redist\MSVC\14.28.29325\x64\Microsoft.VC142.CRT\msvcp140.dll" pyarrow\ + +@REM Bundle the C++ runtime +cp C:\Windows\System32\msvcp140.dll pyarrow\ + +@REM Build wheel python setup.py bdist_wheel || exit /B 1 + +@REM Repair the wheel with delvewheel +@REM +@REM Since we bundled the Arrow C++ libraries ourselves, we only need to +@REM mangle msvcp140.dll so as to avoid ABI issues when msvcp140.dll is +@REM required by multiple Python libraries in the same process. +@REM +@REM For now this requires a custom version of delvewheel: +@REM https://github.com/adang1345/delvewheel/pull/59 +pip install https://github.com/pitrou/delvewheel/archive/refs/heads/fixes-for-arrow.zip || exit /B 1 + +for /f %%i in ('dir dist\pyarrow-*.whl /B') do (set WHEEL_NAME=%cd%\dist\%%i) || exit /B 1 +echo "Wheel name: %WHEEL_NAME%" + +delvewheel repair -vv --mangle-only=msvcp140.dll --no-patch ^ + -w repaired_wheels %WHEEL_NAME% || exit /B 1 + popd diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat index ae5b7e36ad7ab..12d35216b1ca5 100755 --- a/ci/scripts/python_wheel_windows_test.bat +++ b/ci/scripts/python_wheel_windows_test.bat @@ -48,7 +48,7 @@ set PYTHON_CMD=py -%PYTHON% %PYTHON_CMD% -m pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1 @REM Install the built wheels -%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1 +%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\repaired_wheels pyarrow || exit /B 1 @REM Test that the modules are importable %PYTHON_CMD% -c "import pyarrow" || exit /B 1 @@ -65,7 +65,7 @@ set PYTHON_CMD=py -%PYTHON% %PYTHON_CMD% -c "import pyarrow.substrait" || exit /B 1 @REM Validate wheel contents -%PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\dist || exit /B 1 +%PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\repaired_wheels || exit /B 1 @rem Download IANA Timezone Database for ORC C++ curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml index 41585045f685d..de0efedbff534 100644 --- a/dev/tasks/python-wheels/github.windows.yml +++ b/dev/tasks/python-wheels/github.windows.yml @@ -63,7 +63,7 @@ jobs: - uses: actions/upload-artifact@v4 with: name: wheel - path: arrow/python/dist/*.whl + path: arrow/python/repaired_wheels/*.whl - name: Test wheel shell: cmd @@ -71,9 +71,9 @@ jobs: cd arrow archery docker run python-wheel-windows-test - {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }} - {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }} - {{ macros.github_upload_wheel_scientific_python("arrow/python/dist/*.whl")|indent }} + {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }} + {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }} + {{ macros.github_upload_wheel_scientific_python("arrow/repaired_wheels/repaired_wheels/*.whl")|indent }} {% if arrow.is_default_branch() %} - name: Push Docker Image