Skip to content
59 changes: 59 additions & 0 deletions ci/docker/linux-apt-python-313-freethreading.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ARG base
FROM ${base}

RUN apt-get update -y -q && \
apt install -y -q --no-install-recommends software-properties-common gpg-agent && \
add-apt-repository -y ppa:deadsnakes/ppa && \
apt-get update -y -q && \
apt install -y -q --no-install-recommends python3.13-dev python3.13-nogil python3.13-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists*

COPY python/requirements-build.txt \
python/requirements-test.txt \
/arrow/python/

ENV ARROW_PYTHON_VENV /arrow-dev
RUN python3.13t -m venv ${ARROW_PYTHON_VENV}
RUN ${ARROW_PYTHON_VENV}/bin/python -m pip install -U pip setuptools wheel
RUN ${ARROW_PYTHON_VENV}/bin/python -m pip install \
--pre \
--prefer-binary \
--extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" \
-r arrow/python/requirements-build.txt \
-r arrow/python/requirements-test.txt

# We want to run the PyArrow test suite with the GIL disabled, but cffi
# (more precisely, the `_cffi_backend` module) currently doesn't declare
# itself safe to run without the GIL.
# Therefore set PYTHON_GIL to 0.
ENV ARROW_ACERO=ON \
ARROW_BUILD_STATIC=OFF \
ARROW_BUILD_TESTS=OFF \
ARROW_BUILD_UTILITIES=OFF \
ARROW_COMPUTE=ON \
ARROW_CSV=ON \
ARROW_DATASET=ON \
ARROW_FILESYSTEM=ON \
ARROW_GDB=ON \
ARROW_HDFS=ON \
ARROW_JSON=ON \
ARROW_USE_GLOG=OFF \
PYTHON_GIL=0
8 changes: 8 additions & 0 deletions dev/tasks/tasks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,14 @@ tasks:
PYTHON: "3.10"
image: conda-python-cython2

test-ubuntu-22.04-python-313-freethreading:
ci: github
template: docker-tests/github.linux.yml
params:
env:
UBUNTU: 22.04
image: ubuntu-python-313-freethreading

test-debian-12-python-3-amd64:
ci: github
template: docker-tests/github.linux.yml
Expand Down
27 changes: 27 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ x-hierarchy:
- ubuntu-lint
- ubuntu-python
- ubuntu-python-sdist-test
- ubuntu-python-313-freethreading
- ubuntu-r
- ubuntu-r-only-r
- ubuntu-cpp-bundled
Expand Down Expand Up @@ -1085,6 +1086,32 @@ services:
/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/python_sdist_test.sh /arrow"

############################ Python free-threading ##########################

ubuntu-python-313-freethreading:
# Usage:
# docker-compose build ubuntu-cpp
# docker-compose build ubuntu-python-313-freethreading
# docker-compose run --rm ubuntu-python-313-freethreading
# Parameters:
# ARCH: amd64, arm64v8, ...
# UBUNTU: 20.04, 22.04, 24.04
image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-313-freethreading
build:
context: .
dockerfile: ci/docker/linux-apt-python-313-freethreading.dockerfile
cache_from:
- ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-313-freethreading
args:
base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
shm_size: *shm-size
environment:
<<: [*common, *ccache]
# Bundled build of OpenTelemetry needs a git client
ARROW_WITH_OPENTELEMETRY: "OFF"
volumes: *ubuntu-volumes
command: *python-command

############################ Python wheels ##################################

# See available versions at:
Expand Down
7 changes: 7 additions & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@
cmake_minimum_required(VERSION 3.16)
project(pyarrow)

# This is needed for 3.13 free-threading. CMake used to add Python
# include directories with `-isystem`, which led to some Python-internal
# includes to resolve to normal 3.13 includes (cause -isystem includes
# are searched after system directories), instead of 3.13-freethreading,
# which in turn meant that Py_GIL_DISABLED was not set.
set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON)
Copy link
Contributor Author

@lysnikolaou lysnikolaou Aug 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, this was a hard one.

CMake used to add Python include directories with -isystem, which led to some Python-internal includes to resolve to normal 3.13 includes (cause -isystem includes are search after system directories), instead of 3.13-free-threading, which in turn meants that Py_GIL_DISABLED was not set.

Setting this flag uses -I instead. I verified manually that the only include directories here are python-specific (Python & NumPy include directories), so this shouldn't change too much. I couldn't find how I can change this in a more granular way. If someone knows that, help would be really appreaciated!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kou Does this change look ok to you?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use the SYSTEM target property https://cmake.org/cmake/help/latest/prop_tgt/SYSTEM.html instead of this to limit the impact?

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 5d5eeaf815..c19820c074 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -258,6 +258,8 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
 find_package(Python3Alt REQUIRED)
 message(STATUS "Found NumPy version: ${Python3_NumPy_VERSION}")
 message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}")
+# TODO: Describe why we need this
+set_target_properties(Python3::Python PROPERTIES SYSTEM FALSE)
 
 include(UseCython)
 message(STATUS "Found Cython version: ${CYTHON_VERSION}")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use the SYSTEM target property https://cmake.org/cmake/help/latest/prop_tgt/SYSTEM.html instead of this to limit the impact?

What my change is the exact opposite though. It's signifying to not use SYSTEM anywhere, which I guess is what FindPython does.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use the SYSTEM target property https://cmake.org/cmake/help/latest/prop_tgt/SYSTEM.html instead of this to limit the impact?

What my change is the exact opposite though. It's signifying to not use SYSTEM anywhere, which I guess is what FindPython does.

Unless I am mistaken @kou is suggesting, to set SYSTEM to FALSE for Python but leave it as is for the rest of dependencies instead of changing it globally.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like @lysnikolaou I tried several variations on this and I could not make it work.

(such as set_target_properties(Python3::Module PROPERTIES SYSTEM FALSE))

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CMake docs are actually quite cryptic about this as several properties may be involved: SYSTEM, NO_SYSTEM_FROM_IMPORTED and EXPORT_NO_SYSTEM.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kou We'll have to live with this, unless you want to diagnose the issue yourself. Understanding CMake's intricacies is no fun.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can take a look at this, the system behavior is more compiler defined and exposed by CMake, -I always comes before any -isystem, so if you want to be certain you get it before system headers you need to force it. That is coupled with the other behavior that -isystem silences compiler warnings from system headers. It does seem a shame to change it globally, I see this was already merged but I will see if I can spot where it falls down being more surgical about it. In general -isystem is desirable for imported targets but it gets tricky when you have multiple versions of the same package floating around!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I'll try it later by myself too.


set(PYARROW_VERSION "18.0.0-SNAPSHOT")
string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}")

Expand Down