Skip to content

Commit ad9bad9

Browse files
lidavidmkou
andauthored
GH-13: Set up JNI build (dataset, etc.) (#449)
Fixes #13. --------- Co-authored-by: Sutou Kouhei <[email protected]>
1 parent 5a418f9 commit ad9bad9

File tree

11 files changed

+757
-7
lines changed

11 files changed

+757
-7
lines changed

.env

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ ARCH_SHORT=amd64
4040

4141
# Default repository to pull and push images from
4242
REPO=ghcr.io/apache/arrow-java-dev
43+
ARROW_REPO=apache/arrow-dev
4344

4445
# The setup attempts to generate coredumps by default, in order to disable the
4546
# coredump generation set it to 0
@@ -48,3 +49,9 @@ ULIMIT_CORE=-1
4849
# Default versions for various dependencies
4950
JDK=11
5051
MAVEN=3.9.9
52+
53+
# Versions for various dependencies used to build artifacts
54+
# Keep in sync with apache/arrow
55+
ARROW_REPO_ROOT=./arrow
56+
PYTHON=3.9
57+
VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01" # 2024.04.26 Release

.github/workflows/test_jni.yml

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
name: Test (JNI)
19+
20+
on:
21+
push:
22+
branches:
23+
- '**'
24+
- '!dependabot/**'
25+
tags:
26+
- '**'
27+
pull_request:
28+
29+
concurrency:
30+
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
31+
cancel-in-progress: true
32+
33+
permissions:
34+
contents: read
35+
36+
env:
37+
DOCKER_VOLUME_PREFIX: ".docker/"
38+
39+
jobs:
40+
cpp-ubuntu:
41+
name: Build C++ libraries ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }}
42+
runs-on: ${{ matrix.platform.runs_on }}
43+
strategy:
44+
fail-fast: false
45+
matrix:
46+
platform:
47+
- runs_on: ubuntu-latest
48+
arch: "x86_64"
49+
archery_arch: "amd64"
50+
env:
51+
# architecture name used for archery build
52+
ARCH: ${{ matrix.platform.archery_arch }}
53+
permissions:
54+
contents: read
55+
packages: write
56+
steps:
57+
- name: Checkout apache/arrow-java
58+
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
59+
with:
60+
fetch-depth: 0
61+
submodules: recursive
62+
- name: Checkout apache/arrow
63+
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
64+
with:
65+
repository: apache/arrow
66+
fetch-depth: 0
67+
path: arrow
68+
submodules: recursive
69+
- uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
70+
with:
71+
registry: ghcr.io
72+
username: ${{ github.actor }}
73+
password: ${{ secrets.GITHUB_TOKEN }}
74+
- name: Build C++ libraries
75+
run: |
76+
docker compose run vcpkg-jni
77+
- name: Push Docker image
78+
if: success() && github.event_name == 'push' && github.repository == 'apache/arrow-java' && github.ref_name == 'main'
79+
run: |
80+
docker compose push vcpkg-jni
81+
- name: Compress into single artifact to keep directory structure
82+
run: tar -cvzf arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz dist/
83+
- name: Upload artifacts
84+
uses: actions/upload-artifact@v4
85+
with:
86+
name: ubuntu-shared-lib-${{ matrix.platform.arch }}
87+
path: arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz
88+
89+
cpp-macos:
90+
name: Build C++ libraries macOS ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }}
91+
runs-on: ${{ matrix.platform.runs_on }}
92+
strategy:
93+
fail-fast: false
94+
matrix:
95+
platform:
96+
- { runs_on: macos-13, arch: "x86_64"}
97+
- { runs_on: macos-14, arch: "aarch_64" }
98+
env:
99+
MACOSX_DEPLOYMENT_TARGET: "14.0"
100+
steps:
101+
- name: Checkout apache/arrow-java
102+
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
103+
with:
104+
fetch-depth: 0
105+
submodules: recursive
106+
- name: Checkout apache/arrow
107+
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
108+
with:
109+
repository: apache/arrow
110+
fetch-depth: 0
111+
path: arrow
112+
submodules: recursive
113+
- name: Set up Python
114+
uses: actions/setup-python@v4
115+
with:
116+
cache: 'pip'
117+
python-version: 3.12
118+
- name: Install Archery
119+
run: pip install -e arrow/dev/archery[all]
120+
- name: Install dependencies
121+
run: |
122+
# We want to use llvm@14 to avoid shared z3
123+
# dependency. llvm@14 doesn't depend on z3 and llvm depends
124+
# on z3. And Homebrew's z3 provides only shared library. It
125+
# doesn't provides static z3 because z3's CMake doesn't accept
126+
# building both shared and static libraries at once.
127+
# See also: Z3_BUILD_LIBZ3_SHARED in
128+
# https://github.com/Z3Prover/z3/blob/master/README-CMake.md
129+
#
130+
# If llvm is installed, Apache Arrow C++ uses llvm rather than
131+
# llvm@14 because llvm is newer than llvm@14.
132+
brew uninstall llvm || :
133+
134+
# Ensure updating python@XXX with the "--overwrite" option.
135+
# If python@XXX is updated without "--overwrite", it causes
136+
# a conflict error. Because Python 3 installed not by
137+
# Homebrew exists in /usr/local on GitHub Actions. If
138+
# Homebrew's python@XXX is updated without "--overwrite", it
139+
# tries to replace /usr/local/bin/2to3 and so on and causes
140+
# a conflict error.
141+
brew update
142+
for python_package in $(brew list | grep python@); do
143+
brew install --overwrite ${python_package}
144+
done
145+
brew install --overwrite python
146+
147+
if [ "$(uname -m)" = "arm64" ]; then
148+
# pkg-config formula is deprecated but it's still installed
149+
# in GitHub Actions runner now. We can remove this once
150+
# pkg-config formula is removed from GitHub Actions runner.
151+
brew uninstall pkg-config || :
152+
brew uninstall [email protected] || :
153+
fi
154+
155+
brew bundle --file=arrow/cpp/Brewfile
156+
# We want to link aws-sdk-cpp statically but Homebrew's
157+
# aws-sdk-cpp provides only shared library. If we have
158+
# Homebrew's aws-sdk-cpp, our build mix Homebrew's
159+
# aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's
160+
# aws-sdk-cpp to ensure using only bundled aws-sdk-cpp.
161+
brew uninstall aws-sdk-cpp
162+
# We want to use bundled RE2 for static linking. If
163+
# Homebrew's RE2 is installed, its header file may be used.
164+
# We uninstall Homebrew's RE2 to ensure using bundled RE2.
165+
brew uninstall grpc || : # gRPC depends on RE2
166+
brew uninstall [email protected] || : # gRPC 1.54 may be installed too
167+
brew uninstall re2
168+
# We want to use bundled Protobuf for static linking. If
169+
# Homebrew's Protobuf is installed, its library file may be
170+
# used on test We uninstall Homebrew's Protobuf to ensure using
171+
# bundled Protobuf.
172+
brew uninstall protobuf
173+
174+
brew bundle --file=Brewfile
175+
- name: Build C++ libraries
176+
run: |
177+
set -e
178+
# make brew Java available to CMake
179+
export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home
180+
./ci/scripts/jni_macos_build.sh \
181+
$GITHUB_WORKSPACE \
182+
$GITHUB_WORKSPACE/arrow \
183+
$GITHUB_WORKSPACE/arrow-java/cpp-build \
184+
$GITHUB_WORKSPACE/dist
185+
- name: Compress into single artifact to keep directory structure
186+
run: tar -cvzf arrow-shared-libs-macos-${{ matrix.platform.arch }}.tar.gz dist/
187+
- name: Upload artifacts
188+
uses: actions/upload-artifact@v4
189+
with:
190+
name: macos-shared-lib-${{ matrix.platform.arch }}
191+
path: arrow-shared-libs-macos-${{ matrix.platform.arch }}.tar.gz
192+
193+
java-jars:
194+
name: Build JAR files
195+
runs-on: ubuntu-latest
196+
needs:
197+
- cpp-ubuntu
198+
- cpp-macos
199+
steps:
200+
- name: Checkout apache/arrow-java
201+
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
202+
with:
203+
fetch-depth: 0
204+
submodules: recursive
205+
- name: Checkout apache/arrow
206+
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
207+
with:
208+
repository: apache/arrow
209+
fetch-depth: 0
210+
path: arrow
211+
submodules: recursive
212+
- name: Download Libraries
213+
uses: actions/download-artifact@v4
214+
with:
215+
path: artifacts
216+
- name: Decompress artifacts
217+
run: |
218+
mv artifacts/*/*.tar.gz .
219+
tar -xvzf arrow-shared-libs-linux-x86_64.tar.gz
220+
# tar -xvzf arrow-shared-libs-linux-aarch_64.tar.gz
221+
tar -xvzf arrow-shared-libs-macos-x86_64.tar.gz
222+
tar -xvzf arrow-shared-libs-macos-aarch_64.tar.gz
223+
# tar -xvzf arrow-shared-libs-windows.tar.gz
224+
- name: Test that shared libraries exist
225+
run: |
226+
set -x
227+
228+
test -f dist/arrow_cdata_jni/x86_64/libarrow_cdata_jni.so
229+
test -f dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.so
230+
test -f dist/arrow_orc_jni/x86_64/libarrow_orc_jni.so
231+
test -f dist/gandiva_jni/x86_64/libgandiva_jni.so
232+
233+
# test -f dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.so
234+
# test -f dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.so
235+
# test -f dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.so
236+
# test -f dist/gandiva_jni/aarch_64/libgandiva_jni.so
237+
238+
test -f dist/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib
239+
test -f dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib
240+
test -f dist/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib
241+
test -f dist/gandiva_jni/x86_64/libgandiva_jni.dylib
242+
243+
test -f dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib
244+
test -f dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib
245+
test -f dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib
246+
test -f dist/gandiva_jni/aarch_64/libgandiva_jni.dylib
247+
248+
# test -f dist/arrow_cdata_jni/x86_64/arrow_cdata_jni.dll
249+
# test -f dist/arrow_dataset_jni/x86_64/arrow_dataset_jni.dll
250+
# test -f dist/arrow_orc_jni/x86_64/arrow_orc_jni.dll
251+
- name: Build bundled jar
252+
env:
253+
MAVEN_ARGS: >-
254+
--no-transfer-progress
255+
run: |
256+
set -e
257+
# mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }}
258+
# mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f bom
259+
./ci/scripts/jni_full_build.sh \
260+
$GITHUB_WORKSPACE \
261+
$GITHUB_WORKSPACE/arrow \
262+
$GITHUB_WORKSPACE/dist
263+
- name: Upload artifacts
264+
uses: actions/upload-artifact@v4
265+
with:
266+
name: java-jars
267+
path: ${{ github.workspace }}/arrow-java/java-dist

adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.apache.orc.TypeDescription;
3939
import org.apache.orc.Writer;
4040
import org.junit.jupiter.api.BeforeAll;
41+
import org.junit.jupiter.api.Disabled;
4142
import org.junit.jupiter.api.Test;
4243
import org.junit.jupiter.api.io.TempDir;
4344

@@ -53,6 +54,7 @@ public static void beforeClass() {
5354
allocator = new RootAllocator(MAX_ALLOCATION);
5455
}
5556

57+
@Disabled("ORC is flaky: https://github.com/apache/arrow-java/pull/449")
5658
@Test
5759
public void testOrcJniReader() throws Exception {
5860
TypeDescription schema = TypeDescription.fromString("struct<x:int,y:string>");

ci/docker/vcpkg-jni.dockerfile

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
ARG base
19+
FROM ${base}
20+
21+
# Install the libraries required by Gandiva to run
22+
# Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva
23+
RUN vcpkg install \
24+
--clean-after-build \
25+
--x-install-root=${VCPKG_ROOT}/installed \
26+
--x-manifest-root=/arrow/ci/vcpkg \
27+
--x-feature=dev \
28+
--x-feature=flight \
29+
--x-feature=gcs \
30+
--x-feature=json \
31+
--x-feature=parquet \
32+
--x-feature=gandiva \
33+
--x-feature=s3
34+
35+
# Install Java
36+
# We need Java for JNI headers, but we don't invoke Maven in this build.
37+
ARG java=11
38+
RUN yum install -y java-$java-openjdk-devel && yum clean all
39+
40+
# For ci/scripts/{cpp,java}_*.sh
41+
ENV ARROW_HOME=/tmp/local \
42+
ARROW_JAVA_CDATA=ON \
43+
ARROW_JAVA_JNI=ON \
44+
ARROW_USE_CCACHE=ON
45+
46+
LABEL org.opencontainers.image.source https://github.com/apache/arrow-java

ci/scripts/java_jni_build.sh renamed to ci/scripts/jni_build.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# specific language governing permissions and limitations
1717
# under the License.
1818

19-
set -eo pipefail
19+
set -exo pipefail
2020

2121
arrow_dir=${1}
2222
arrow_install_dir=${2}
@@ -47,6 +47,7 @@ esac
4747

4848
: "${ARROW_JAVA_BUILD_TESTS:=${ARROW_BUILD_TESTS:-OFF}}"
4949
: "${CMAKE_BUILD_TYPE:=release}"
50+
read -ra EXTRA_CMAKE_OPTIONS <<<"${JAVA_JNI_CMAKE_ARGS:-}"
5051
cmake \
5152
-DARROW_JAVA_JNI_ENABLE_DATASET="${ARROW_DATASET:-OFF}" \
5253
-DARROW_JAVA_JNI_ENABLE_GANDIVA="${ARROW_GANDIVA:-OFF}" \
@@ -58,7 +59,7 @@ cmake \
5859
-DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD:-OFF}" \
5960
-DProtobuf_USE_STATIC_LIBS=ON \
6061
-GNinja \
61-
"${JAVA_JNI_CMAKE_ARGS:-}" \
62+
"${EXTRA_CMAKE_OPTIONS[@]}" \
6263
"${arrow_dir}"
6364
export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs}
6465
cmake --build . --config "${CMAKE_BUILD_TYPE}"

0 commit comments

Comments
 (0)