Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
279 changes: 279 additions & 0 deletions .github/workflows/LakekeeperTesting.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
name: Local Lakekeeper Testing
on: [push, pull_request,repository_dispatch]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
cancel-in-progress: true
defaults:
run:
shell: bash

env:
BASE_BRANCH: ${{ github.base_ref || (endsWith(github.ref, '_feature') && 'feature' || 'main') }}

jobs:
rest:
name: Test against Lakekeeper Catalog
runs-on: ubuntu-latest
env:
VCPKG_TARGET_TRIPLET: 'x64-linux'
GEN: ninja
VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake
PIP_BREAK_SYSTEM_PACKAGES: 1

steps:
- name: Install required ubuntu packages
run: |
sudo apt-get update -y -qq
sudo apt-get install -y -qq software-properties-common
sudo add-apt-repository ppa:git-core/ppa
sudo apt-get update -y -qq
sudo apt-get install -y -qq \
ninja-build \
make gcc-multilib \
g++-multilib \
libssl-dev \
wget \
openjdk-8-jdk \
zip \
maven \
unixodbc-dev \
libc6-dev-i386 \
lib32readline6-dev \
libssl-dev \
libcurl4-gnutls-dev \
libexpat1-dev \
gettext \
unzip \
build-essential \
checkinstall \
libffi-dev \
curl \
libz-dev \
openssh-client
sudo apt-get install -y -qq tar pkg-config
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose

- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: 'true'

- name: Setup vcpkg
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 5e5d0e1cd7785623065e77eff011afdeec1a3574

- name: Setup Ccache
uses: hendrikmuhs/ccache-action@main
continue-on-error: true

- name: Build extension
env:
GEN: ninja
STATIC_LIBCPP: 1
run: |
make release

- name: Set up for Lakekeeper
run: |
# install java
# TODO: need a newer java version maybe?
sudo apt install -y -qq openjdk-21-jre-headless
sudo apt install -y -qq openjdk-21-jdk-headless
sudo apt-get install -y -qq python3-venv
git clone https://github.com/lakekeeper/lakekeeper.git lakekeeper

- name: Start Keycloak
run: |
docker run -d \
--name keycloak \
-p 30080:8080 \
-e KC_BOOTSTRAP_ADMIN_USERNAME=admin \
-e KC_BOOTSTRAP_ADMIN_PASSWORD=admin \
-v $(pwd)/lakekeeper/examples/access-control-simple/keycloak/realm.json:/opt/keycloak/data/import/realm.json \
quay.io/keycloak/keycloak:26.0.7 \
start-dev --metrics-enabled=true --health-enabled=true --import-realm --verbose --log-level=INFO --features=token-exchange

- name: Wait for Keycloak
run: |
max_attempts=30
attempt=1
while ! curl -sf "http://localhost:30080/realms/master/.well-known/openid-configuration"; do
if [ $attempt -gt $max_attempts ]; then
echo "Keycloak failed to initialize after $max_attempts attempts"
exit 1
fi
echo "Waiting for Keycloak to initialize (attempt $attempt/$max_attempts)..."
sleep 5
attempt=$((attempt + 1))
done
echo "Keycloak is healthy"

- name: Start PostgreSQL
run: |
docker run -d \
--name postgres \
-p 5433:5432 \
-e POSTGRESQL_USERNAME=postgres \
-e POSTGRESQL_PASSWORD=postgres \
-e POSTGRESQL_DATABASE=postgres \
bitnami/postgresql:16.3.0

- name: Wait for PostgreSQL
run: |
max_attempts=30
attempt=1
while ! docker exec postgres pg_isready -U postgres -p 5432 -d postgres; do
if [ $attempt -gt $max_attempts ]; then
echo "PostgreSQL failed to initialize after $max_attempts attempts"
exit 1
fi
echo "Waiting for PostgreSQL to initialize (attempt $attempt/$max_attempts)..."
sleep 5
attempt=$((attempt + 1))
done
echo "PostgreSQL is healthy"

- name: Run OpenFGA DB migrations
run: |
docker run --rm \
--network="host" \
-e OPENFGA_DATASTORE_ENGINE=postgres \
-e OPENFGA_DATASTORE_URI="postgres://postgres:postgres@localhost:5433/postgres?sslmode=disable" \
openfga/openfga:v1.8 migrate

- name: Start OpenFGA
run: |
docker run -d \
--name openfga \
-p 8081:8081 \
-e OPENFGA_DATASTORE_ENGINE=postgres \
-e OPENFGA_DATASTORE_URI="postgres://postgres:postgres@localhost:5433/postgres?sslmode=disable" \
-e OPENFGA_AUTHN_METHOD=oidc \
-e OPENFGA_AUTHN_OIDC_ISSUER=http://localhost:30080/realms/iceberg \
-e OPENFGA_AUTHN_OIDC_AUDIENCE=openfga \
-e OPENFGA_HTTP_TLS_ENABLED=false \
--network="host" \
openfga/openfga:v1.8 run --playground-enabled=false

- name: Wait for OpenFGA
run: |
max_attempts=30
attempt=1
while ! curl -sf http://localhost:8080/healthz; do
if [ $attempt -gt $max_attempts ]; then
echo "OpenFGA failed to initialize after $max_attempts attempts"
exit 1
fi
echo "Waiting for OpenFGA to initialize (attempt $attempt/$max_attempts)..."
sleep 5
attempt=$((attempt + 1))
done
echo "OpenFGA is healthy"

- name: Start Minio
run: |
docker run -d \
--name minio \
--network="host" \
-e MINIO_ROOT_USER=minio-root-user \
-e MINIO_ROOT_PASSWORD=minio-root-password \
-e MINIO_API_PORT_NUMBER=9000 \
-e MINIO_CONSOLE_PORT_NUMBER=9001 \
-e MINIO_SCHEME=http \
-e MINIO_DEFAULT_BUCKETS=examples \
--health-cmd="mc ls local | grep examples" \
--health-interval=2s \
--health-timeout=10s \
--health-retries=2 \
--health-start-period=15s \
-p 9000:9000 \
-p 9001:9001 \
bitnami/minio:latest

- name: Wait for Minio
run: |
max_attempts=30
attempt=1
while ! docker exec minio mc ls local | grep examples; do
if [ $attempt -gt $max_attempts ]; then
echo "Minio failed to initialize after $max_attempts attempts"
exit 1
fi
echo "Waiting for Minio to initialize (attempt $attempt/$max_attempts)..."
sleep 5
attempt=$((attempt + 1))
done
echo "Minio is healthy"

- name: Run Lakekeeper DB Migrations
run: |
docker run --rm \
--network="host" \
-e LAKEKEEPER__PG_ENCRYPTION_KEY=This-is-NOT-Secure! \
-e LAKEKEEPER__PG_DATABASE_URL_READ=postgresql://postgres:postgres@localhost:5433/postgres?sslmode=disable \
-e LAKEKEEPER__PG_DATABASE_URL_WRITE=postgresql://postgres:postgres@localhost:5433/postgres?sslmode=disable \
-e LAKEKEEPER__AUTHZ_BACKEND=openfga \
-e LAKEKEEPER__OPENFGA__ENDPOINT=http://localhost:8081 \
-e LAKEKEEPER__OPENFGA__CLIENT_ID=openfga \
-e LAKEKEEPER__OPENFGA__CLIENT_SECRET=xqE1vUrifVDKAZdLuz6JAnDxMYLdGu5z \
-e LAKEKEEPER__OPENFGA__TOKEN_ENDPOINT=http://localhost:30080/realms/iceberg/protocol/openid-connect/token \
-e RUST_LOG=info,iceberg-catalog=info,middle=trace \
quay.io/lakekeeper/catalog:latest-main migrate

- name: Start Lakekeeper
run: |
docker run -d \
--name lakekeeper \
--network="host" \
-e LAKEKEEPER__PG_ENCRYPTION_KEY=This-is-NOT-Secure! \
-e LAKEKEEPER__PG_DATABASE_URL_READ=postgresql://postgres:postgres@localhost:5433/postgres?sslmode=disable \
-e LAKEKEEPER__PG_DATABASE_URL_WRITE=postgresql://postgres:postgres@localhost:5433/postgres?sslmode=disable \
-e LAKEKEEPER__AUTHZ_BACKEND=openfga \
-e LAKEKEEPER__OPENFGA__ENDPOINT=http://localhost:8081 \
-e LAKEKEEPER__OPENID_PROVIDER_URI=http://localhost:30080/realms/iceberg \
-e LAKEKEEPER__OPENID_AUDIENCE=lakekeeper \
-e LAKEKEEPER__OPENFGA__CLIENT_ID=openfga \
-e LAKEKEEPER__OPENFGA__CLIENT_SECRET=xqE1vUrifVDKAZdLuz6JAnDxMYLdGu5z \
-e LAKEKEEPER__OPENFGA__TOKEN_ENDPOINT=http://localhost:30080/realms/iceberg/protocol/openid-connect/token \
-e LAKEKEEPER__METRICS_PORT=9002 \
quay.io/lakekeeper/catalog:latest-main serve

- name: Wait for Lakekeeper
run: |
max_attempts=30
attempt=1
while ! docker exec lakekeeper /home/nonroot/iceberg-catalog healthcheck; do
if [ $attempt -gt $max_attempts ]; then
echo "Lakekeeper failed to initialize after $max_attempts attempts"
exit 1
fi
echo "Waiting for Lakekeeper to initialize (attempt $attempt/$max_attempts)..."
sleep 5
attempt=$((attempt + 1))
done
echo "Lakekeeper is healthy"

- name: Populate Lakekeeper
env:
JAVA_HOME: /usr/lib/jvm/java-21-openjdk-amd64
run: |
sudo apt-get install -y -qq python3-venv
python3 -m venv .
source ./bin/activate
python3 -m pip install poetry
python3 -m pip install pyspark==3.5.0
python3 -m pip install duckdb
python3 -m pip install pandas
python3 -m pip install setuptools
python3 scripts/lakekeeper/setup_lakekeeper_catalog.py
python3 scripts/data_generators/generate_data.py lakekeeper

- name: Test with rest catalog
env:
LAKEKEEPER_SERVER_AVAILABLE: 1
run: |
./build/release/test/unittest "$PWD/test/*" --list-test-names-only || true
./build/release/test/unittest "$PWD/test/*"

18 changes: 9 additions & 9 deletions .github/workflows/PolarisTesting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ defaults:

env:
BASE_BRANCH: ${{ github.base_ref || (endsWith(github.ref, '_feature') && 'feature' || 'main') }}
CMAKE_POLICY_VERSION_MINIMUM: 3.5

jobs:
rest:
Expand Down Expand Up @@ -55,11 +54,6 @@ jobs:
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose

- name: Install CMake 3.x
run: |
sudo apt-get remove -y cmake cmake-data
sudo apt-get install --allow-downgrades -y -qq 'cmake=3.*' 'cmake-data=3.*'

- uses: actions/checkout@v4
with:
fetch-depth: 0
Expand All @@ -86,14 +80,19 @@ jobs:
# install java
sudo apt install -y -qq openjdk-21-jre-headless
sudo apt install -y -qq openjdk-21-jdk-headless
# install python virtual environment (is this needed?)
sudo apt-get install -y -qq python3-venv

- name: Wait for polaris initialization
env:
JAVA_HOME: /usr/lib/jvm/java-21-openjdk-amd64
run: |
make setup_polaris_ci
mkdir polaris_catalog
git clone https://github.com/apache/polaris.git polaris_catalog
cd polaris_catalog
./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true --no-build-cache
./gradlew --stop
nohup ./gradlew run > polaris-server.log 2> polaris-error.log &
cd ..
# let polaris initialize
max_attempts=50
attempt=1
Expand Down Expand Up @@ -132,4 +131,5 @@ jobs:
run: |
export POLARIS_CLIENT_ID=$(cat polaris_client_id.txt)
export POLARIS_CLIENT_SECRET=$(cat polaris_client_secret.txt)
make test_release
./build/release/test/unittest "$PWD/test/*" --list-test-names-only || true
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any specific reason we want to list the test names and then run the tests?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To verify that the tests we care about are actually being run

./build/release/test/unittest "$PWD/test/*"
8 changes: 0 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,6 @@ data: data_clean start-rest-catalog
data_large: data data_clean
python3 scripts/data_generators/generate_data.py spark-rest local

# setup polaris server. See PolarisTesting.yml to see instructions for a specific machine.
setup_polaris_ci:
mkdir polaris_catalog
git clone https://github.com/apache/polaris.git polaris_catalog
cd polaris_catalog && ./gradlew clean :polaris-quarkus-server:assemble -Dquarkus.container-image.build=true --no-build-cache
cd polaris_catalog && ./gradlew --stop
cd polaris_catalog && nohup ./gradlew run > polaris-server.log 2> polaris-error.log &

data_clean:
rm -rf data/generated

Expand Down
13 changes: 13 additions & 0 deletions scripts/data_generators/generate_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from generate_spark_local.generate_iceberg_spark_local import IcebergSparkLocal
from generate_spark_rest.generate_iceberg_spark_rest import IcebergSparkRest
from generate_polaris_rest.generate_iceberg_polaris_rest import IcebergPolarisRest
from generate_lakekeeper_rest.generate_iceberg_lakekeeper_rest import IcebergLakekeeperRest
import sys


Expand Down Expand Up @@ -31,12 +32,24 @@ def GeneratePolarisData():
del conn


def GenerateLakekeeperData():
db = IcebergLakekeeperRest()
conn = db.GetConnection()
db.GenerateTables(conn)
db.CloseConnection(conn)
del db
del conn


if __name__ == "__main__":
argv = sys.argv
for i in range(1, len(argv)):
if argv[i] == "polaris":
print("generating polaris data")
GeneratePolarisData()
elif argv[i] == "lakekeeper":
print("generating lakekeeper data")
GenerateLakekeeperData()
elif argv[i] == "local":
print("generating local iceberg data")
GenerateSparkLocal()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading
Loading