Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 59 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,22 @@

# Version parameters (can be overridden from command line)
# Example: make install SPARK_VERSION=3.5 SCALA_VERSION=2.13
SPARK_VERSION ?= 3.5
SCALA_VERSION ?= 2.12
SPARK_VERSION ?= 4.0
Comment thread
hamersaw marked this conversation as resolved.
Outdated
SCALA_VERSION ?= 2.13

# Derived module names
MODULE := lance-spark-$(SPARK_VERSION)_$(SCALA_VERSION)
BUNDLE_MODULE := lance-spark-bundle-$(SPARK_VERSION)_$(SCALA_VERSION)
BASE_MODULE := lance-spark-base_$(SCALA_VERSION)

# Maven profiles for Spark/Scala version-specific settings
MAVEN_PROFILES := -Pspark-$(SPARK_VERSION),scala-$(SCALA_VERSION)

# Spark download versions for Docker
include docker/versions.mk
SPARK_DOWNLOAD_VERSION := $(SPARK_DOWNLOAD_VERSION_$(SPARK_VERSION))
PY4J_VERSION := $(PY4J_VERSION_$(SPARK_VERSION))

DOCKER_COMPOSE := $(shell \
if docker compose version >/dev/null 2>&1; then \
echo "docker compose"; \
Expand All @@ -35,11 +43,11 @@ DOCKER_COMPOSE := $(shell \

.PHONY: install
install:
./mvnw install -pl $(MODULE) -am -DskipTests
./mvnw install -pl $(MODULE) -am -DskipTests $(MAVEN_PROFILES)

.PHONY: test
test:
./mvnw test -pl $(MODULE)
./mvnw test -pl $(MODULE) $(MAVEN_PROFILES)

.PHONY: build
build: lint install
Expand All @@ -50,11 +58,11 @@ clean-module:

.PHONY: bundle
bundle:
./mvnw install -pl $(BUNDLE_MODULE) -am -DskipTests
./mvnw install -pl $(BUNDLE_MODULE) -am -DskipTests $(MAVEN_PROFILES)

.PHONY: install-base
install-base:
./mvnw install -pl $(BASE_MODULE) -am -DskipTests
./mvnw install -pl $(BASE_MODULE) -am -DskipTests $(MAVEN_PROFILES)

# =============================================================================
# Global commands (all modules)
Expand All @@ -68,13 +76,24 @@ lint:
format:
./mvnw spotless:apply

# All supported Spark/Scala combinations
SPARK_SCALA_COMBOS := 3.4_2.12 3.4_2.13 3.5_2.12 3.5_2.13 4.0_2.13

.PHONY: install-all
install-all:
./mvnw install -DskipTests
@for combo in $(SPARK_SCALA_COMBOS); do \
spark=$${combo%%_*}; scala=$${combo#*_}; \
echo "=== Installing Spark $$spark / Scala $$scala ==="; \
$(MAKE) install SPARK_VERSION=$$spark SCALA_VERSION=$$scala || exit 1; \
done

.PHONY: test-all
test-all:
./mvnw test
@for combo in $(SPARK_SCALA_COMBOS); do \
spark=$${combo%%_*}; scala=$${combo#*_}; \
echo "=== Testing Spark $$spark / Scala $$scala ==="; \
$(MAKE) test SPARK_VERSION=$$spark SCALA_VERSION=$$scala || exit 1; \
done

.PHONY: build-all
build-all: lint install-all
Expand All @@ -95,9 +114,15 @@ clean:

.PHONY: docker-build
docker-build:
$(MAKE) bundle SPARK_VERSION=3.5 SCALA_VERSION=2.12
cp lance-spark-bundle-3.5_2.12/target/lance-spark-bundle-3.5_2.12-*.jar docker/
cd docker && docker compose build --no-cache spark-lance
@ls $(BUNDLE_MODULE)/target/$(BUNDLE_MODULE)-*.jar >/dev/null 2>&1 || \
(echo "Error: Bundle jar not found. Run 'make bundle' first." && exit 1)
rm -f docker/lance-spark-bundle-*.jar
cp $(BUNDLE_MODULE)/target/$(BUNDLE_MODULE)-*.jar docker/
cd docker && $(DOCKER_COMPOSE) build --no-cache \
--build-arg SPARK_DOWNLOAD_VERSION=$(SPARK_DOWNLOAD_VERSION) \
--build-arg SPARK_MAJOR_VERSION=$(SPARK_VERSION) \
--build-arg SCALA_VERSION=$(SCALA_VERSION) \
spark-lance

.PHONY: docker-up
docker-up: check-docker-compose
Expand All @@ -111,6 +136,28 @@ docker-shell:
docker-down: check-docker-compose
cd docker && ${DOCKER_COMPOSE} down

.PHONY: docker-build-minimal
docker-build-minimal:
@ls $(BUNDLE_MODULE)/target/$(BUNDLE_MODULE)-*.jar >/dev/null 2>&1 || \
(echo "Error: Bundle jar not found. Run 'make bundle' first." && exit 1)
rm -f docker/lance-spark-bundle-*.jar
cp $(BUNDLE_MODULE)/target/$(BUNDLE_MODULE)-*.jar docker/
cd docker && docker build \
--build-arg SPARK_DOWNLOAD_VERSION=$(SPARK_DOWNLOAD_VERSION) \
--build-arg SPARK_MAJOR_VERSION=$(SPARK_VERSION) \
--build-arg SCALA_VERSION=$(SCALA_VERSION) \
--build-arg PY4J_VERSION=$(PY4J_VERSION) \
-f Dockerfile.minimal \
-t spark-lance-minimal:latest \
.

.PHONY: docker-test
docker-test:
@docker image inspect spark-lance-minimal:latest >/dev/null 2>&1 || \
(echo "Error: Docker image 'spark-lance-minimal:latest' not found. Run 'make docker-build-minimal' first." && exit 1)
docker run --rm --hostname spark-lance spark-lance-minimal:latest \
"pytest /home/lance/tests/ -v --timeout=120"

# =============================================================================
# Documentation
# =============================================================================
Expand Down Expand Up @@ -151,6 +198,7 @@ help:
@echo " docker-up - Start docker containers"
@echo " docker-shell - Open shell in spark-lance container"
@echo " docker-down - Stop docker containers"
@echo " docker-test - Run integration tests in spark-lance-minimal container"
@echo ""
@echo "Documentation:"
@echo " serve-docs - Serve documentation locally"
15 changes: 12 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,20 @@ RUN python3 -m venv /opt/venv && \

ENV PATH="/opt/venv/bin:$PATH"

# Build arguments
ARG SPARK_DOWNLOAD_VERSION=4.0.0
ARG SPARK_MAJOR_VERSION=4.0
ARG SCALA_VERSION=2.13

# Optional env variables
ENV SPARK_HOME=${SPARK_HOME:-"/opt/spark"}
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH

WORKDIR ${SPARK_HOME}

ENV SPARK_VERSION=3.5.8
ENV SPARK_MAJOR_VERSION=3.5
ENV SPARK_VERSION=${SPARK_DOWNLOAD_VERSION}
ENV SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}
ENV SCALA_VERSION=${SCALA_VERSION}
ENV LANCE_SPARK_VERSION=0.1.3-beta.8
Comment thread
hamersaw marked this conversation as resolved.
ENV LANCE_NS_VERSION=0.4.5

Expand All @@ -53,7 +59,7 @@ RUN curl -L https://repo1.maven.org/maven2/com/lancedb/lance-namespace-glue/${LA
-o /opt/spark/jars/lance-namespace-glue-${LANCE_NS_VERSION}.jar

# For local testing, uncomment the lines below and comment out the Maven downloads above:
#COPY lance-spark-bundle-${SPARK_MAJOR_VERSION}_2.12-${LANCE_SPARK_VERSION}.jar /opt/spark/jars/
#COPY lance-spark-bundle-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-${LANCE_SPARK_VERSION}.jar /opt/spark/jars/
#COPY lance-namespace-glue-${LANCE_NS_VERSION}.jar /opt/spark/jars/

# Download OpenDAL native libraries for Linux architectures
Expand Down Expand Up @@ -82,6 +88,9 @@ RUN mkdir -p /home/lance/warehouse /home/lance/notebooks /home/lance/spark-event
# Copy notebooks if available
COPY notebooks/ /home/lance/notebooks/

# Copy tests
COPY tests/ /home/lance/tests/

# Add a notebook command
RUN echo '#! /bin/sh' >> /bin/notebook \
&& echo 'export PYSPARK_DRIVER_PYTHON=jupyter-lab' >> /bin/notebook \
Expand Down
67 changes: 67 additions & 0 deletions docker/Dockerfile.minimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# syntax=docker/dockerfile:1
#
# Minimal image for running lance-spark integration tests.
# Layers are ordered so that package installation is cached and only
# the Spark download + bundle JAR copy run on each rebuild.

FROM ubuntu:24.04

# --- Cached layers: system packages and Python test deps ---
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
openjdk-17-jdk-headless \
python3 \
python3-dev \
python3-venv \
python3-pip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN python3 -m venv /opt/venv && \
. /opt/venv/bin/activate && \
pip install --upgrade pip && \
pip install pytest pytest-timeout

ENV PATH="/opt/venv/bin:$PATH"
ENV SPARK_HOME="/opt/spark"
ENV PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
ENV PATH="${SPARK_HOME}/sbin:${SPARK_HOME}/bin:${PATH}"

ARG SPARK_DOWNLOAD_VERSION=4.0.0
ARG SPARK_MAJOR_VERSION=4.0
ARG SCALA_VERSION=2.13
ARG PY4J_VERSION=0.10.9.9

ENV PYTHONPATH="${SPARK_HOME}/python/lib/py4j-${PY4J_VERSION}-src.zip:${PYTHONPATH}"

RUN mkdir -p ${SPARK_HOME} \
&& curl https://dlcdn.apache.org/spark/spark-${SPARK_DOWNLOAD_VERSION}/spark-${SPARK_DOWNLOAD_VERSION}-bin-hadoop3.tgz \
-o spark.tgz \
&& tar xzf spark.tgz --directory ${SPARK_HOME} --strip-components 1 \
&& rm spark.tgz

# --- Uncached layers: Spark download and bundle JAR ---

# Add a random query as a cache buster
ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache

COPY lance-spark-bundle-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.jar ${SPARK_HOME}/jars/

# Spark configuration and entrypoint
COPY spark-defaults.conf ${SPARK_HOME}/conf/
RUN chmod u+x ${SPARK_HOME}/sbin/* && \
chmod u+x ${SPARK_HOME}/bin/*

# Create directories for Spark events and test data
RUN mkdir -p /home/lance/warehouse /home/lance/spark-events /home/lance/data

# Copy tests
RUN mkdir -p /home/lance/tests
COPY tests/ /home/lance/tests/

WORKDIR ${SPARK_HOME}
COPY entrypoint.sh .

ENTRYPOINT ["./entrypoint.sh"]
CMD ["tail", "-f", "/dev/null"]
8 changes: 2 additions & 6 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,16 @@ services:
spark-lance:
image: spark-lance:latest
container_name: spark-lance
build:
build:
context: .
dockerfile: Dockerfile
args:
- SPARK_VERSION=3.5.6
- LANCE_VERSION=0.0.5
networks:
lance_net:
depends_on:
- minio
volumes:
- ./warehouse:/home/lance/warehouse
- ./notebooks:/home/lance/notebooks
- ../lance-spark-bundle-3.5_2.12/target:/lance-jars
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
Expand Down Expand Up @@ -66,4 +62,4 @@ services:
"

networks:
lance_net:
lance_net:
2 changes: 2 additions & 0 deletions docker/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pytest==8.3.5
pytest-timeout==2.3.1
jupyterlab==3.6.7
jupyter-server==1.24.0
pandas==2.3.1
Expand Down
Loading
Loading