Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 39 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ MODULE := lance-spark-$(SPARK_VERSION)_$(SCALA_VERSION)
BUNDLE_MODULE := lance-spark-bundle-$(SPARK_VERSION)_$(SCALA_VERSION)
BASE_MODULE := lance-spark-base_$(SCALA_VERSION)

# Spark download versions for Docker
include docker/versions.mk
SPARK_DOWNLOAD_VERSION := $(SPARK_DOWNLOAD_VERSION_$(SPARK_VERSION))
PY4J_VERSION := $(PY4J_VERSION_$(SPARK_VERSION))

DOCKER_COMPOSE := $(shell \
if docker compose version >/dev/null 2>&1; then \
echo "docker compose"; \
Expand Down Expand Up @@ -95,9 +100,15 @@ clean:

.PHONY: docker-build
docker-build:
$(MAKE) bundle SPARK_VERSION=3.5 SCALA_VERSION=2.12
cp lance-spark-bundle-3.5_2.12/target/lance-spark-bundle-3.5_2.12-*.jar docker/
cd docker && docker compose build --no-cache spark-lance
@ls $(BUNDLE_MODULE)/target/$(BUNDLE_MODULE)-*.jar >/dev/null 2>&1 || \
(echo "Error: Bundle jar not found. Run 'make bundle' first." && exit 1)
rm -f docker/lance-spark-bundle-*.jar
cp $(BUNDLE_MODULE)/target/$(BUNDLE_MODULE)-*.jar docker/
cd docker && $(DOCKER_COMPOSE) build --no-cache \
--build-arg SPARK_DOWNLOAD_VERSION=$(SPARK_DOWNLOAD_VERSION) \
--build-arg SPARK_MAJOR_VERSION=$(SPARK_VERSION) \
--build-arg SCALA_VERSION=$(SCALA_VERSION) \
spark-lance

.PHONY: docker-up
docker-up: check-docker-compose
Expand All @@ -111,6 +122,30 @@ docker-shell:
docker-down: check-docker-compose
cd docker && ${DOCKER_COMPOSE} down

.PHONY: docker-build-minimal
docker-build-minimal:
@ls $(BUNDLE_MODULE)/target/$(BUNDLE_MODULE)-*.jar >/dev/null 2>&1 || \
(echo "Error: Bundle jar not found. Run 'make bundle' first." && exit 1)
rm -f docker/lance-spark-bundle-*.jar
cp $(BUNDLE_MODULE)/target/$(BUNDLE_MODULE)-*.jar docker/
cd docker && docker build \
--build-arg SPARK_DOWNLOAD_VERSION=$(SPARK_DOWNLOAD_VERSION) \
--build-arg SPARK_MAJOR_VERSION=$(SPARK_VERSION) \
--build-arg SCALA_VERSION=$(SCALA_VERSION) \
--build-arg PY4J_VERSION=$(PY4J_VERSION) \
-f Dockerfile.minimal \
-t spark-lance-minimal:$(SPARK_VERSION)_$(SCALA_VERSION) \
.

.PHONY: docker-test
docker-test:
@docker image inspect spark-lance-minimal:$(SPARK_VERSION)_$(SCALA_VERSION) >/dev/null 2>&1 || \
(echo "Error: Docker image 'spark-lance-minimal:$(SPARK_VERSION)_$(SCALA_VERSION)' not found. Run 'make docker-build-minimal' first." && exit 1)
docker run --rm --hostname spark-lance \
-e SPARK_VERSION=$(SPARK_VERSION) \
spark-lance-minimal:$(SPARK_VERSION)_$(SCALA_VERSION) \
"pytest /home/lance/tests/ -v --timeout=120"

# =============================================================================
# Documentation
# =============================================================================
Expand Down Expand Up @@ -151,6 +186,7 @@ help:
@echo " docker-up - Start docker containers"
@echo " docker-shell - Open shell in spark-lance container"
@echo " docker-down - Stop docker containers"
@echo " docker-test - Run integration tests in spark-lance-minimal container"
@echo ""
@echo "Documentation:"
@echo " serve-docs - Serve documentation locally"
15 changes: 12 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,20 @@ RUN python3 -m venv /opt/venv && \

ENV PATH="/opt/venv/bin:$PATH"

# Build arguments
ARG SPARK_DOWNLOAD_VERSION=4.0.0
ARG SPARK_MAJOR_VERSION=4.0
ARG SCALA_VERSION=2.13

# Optional env variables
ENV SPARK_HOME=${SPARK_HOME:-"/opt/spark"}
ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH

WORKDIR ${SPARK_HOME}

ENV SPARK_VERSION=3.5.8
ENV SPARK_MAJOR_VERSION=3.5
ENV SPARK_VERSION=${SPARK_DOWNLOAD_VERSION}
ENV SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}
ENV SCALA_VERSION=${SCALA_VERSION}
ENV LANCE_SPARK_VERSION=0.1.3-beta.8
Comment thread
hamersaw marked this conversation as resolved.
ENV LANCE_NS_VERSION=0.4.5

Expand All @@ -53,7 +59,7 @@ RUN curl -L https://repo1.maven.org/maven2/com/lancedb/lance-namespace-glue/${LA
-o /opt/spark/jars/lance-namespace-glue-${LANCE_NS_VERSION}.jar

# For local testing, uncomment the lines below and comment out the Maven downloads above:
#COPY lance-spark-bundle-${SPARK_MAJOR_VERSION}_2.12-${LANCE_SPARK_VERSION}.jar /opt/spark/jars/
#COPY lance-spark-bundle-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-${LANCE_SPARK_VERSION}.jar /opt/spark/jars/
#COPY lance-namespace-glue-${LANCE_NS_VERSION}.jar /opt/spark/jars/

# Download OpenDAL native libraries for Linux architectures
Expand Down Expand Up @@ -82,6 +88,9 @@ RUN mkdir -p /home/lance/warehouse /home/lance/notebooks /home/lance/spark-event
# Copy notebooks if available
COPY notebooks/ /home/lance/notebooks/

# Copy tests
COPY tests/ /home/lance/tests/

# Add a notebook command
RUN echo '#! /bin/sh' >> /bin/notebook \
&& echo 'export PYSPARK_DRIVER_PYTHON=jupyter-lab' >> /bin/notebook \
Expand Down
67 changes: 67 additions & 0 deletions docker/Dockerfile.minimal
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# syntax=docker/dockerfile:1
#
# Minimal image for running lance-spark integration tests.
# Layers are ordered so that package installation is cached and only
# the Spark download + bundle JAR copy run on each rebuild.

FROM ubuntu:24.04

# --- Cached layers: system packages and Python test deps ---
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
openjdk-17-jdk-headless \
python3 \
python3-dev \
python3-venv \
python3-pip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN python3 -m venv /opt/venv && \
. /opt/venv/bin/activate && \
pip install --upgrade pip && \
pip install pytest pytest-timeout packaging

ENV PATH="/opt/venv/bin:$PATH"
ENV SPARK_HOME="/opt/spark"
ENV PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
ENV PATH="${SPARK_HOME}/sbin:${SPARK_HOME}/bin:${PATH}"

ARG SPARK_DOWNLOAD_VERSION=4.0.0
ARG SPARK_MAJOR_VERSION=4.0
ARG SCALA_VERSION=2.13
ARG PY4J_VERSION=0.10.9.9

ENV PYTHONPATH="${SPARK_HOME}/python/lib/py4j-${PY4J_VERSION}-src.zip:${PYTHONPATH}"

RUN mkdir -p ${SPARK_HOME} \
&& curl https://archive.apache.org/dist/spark/spark-${SPARK_DOWNLOAD_VERSION}/spark-${SPARK_DOWNLOAD_VERSION}-bin-hadoop3.tgz \
-o spark.tgz \
&& tar xzf spark.tgz --directory ${SPARK_HOME} --strip-components 1 \
&& rm spark.tgz

# --- Uncached layers: Spark download and bundle JAR ---

# Add a random query as a cache buster
ADD "https://www.random.org/cgi-bin/randbyte?nbytes=10&format=h" skipcache

COPY lance-spark-bundle-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.jar ${SPARK_HOME}/jars/

# Spark configuration and entrypoint
COPY spark-defaults.conf ${SPARK_HOME}/conf/
RUN chmod u+x ${SPARK_HOME}/sbin/* && \
chmod u+x ${SPARK_HOME}/bin/*

# Create directories for Spark events and test data
RUN mkdir -p /home/lance/warehouse /home/lance/spark-events /home/lance/data

# Copy tests
RUN mkdir -p /home/lance/tests
COPY tests/ /home/lance/tests/

WORKDIR ${SPARK_HOME}
COPY entrypoint.sh .

ENTRYPOINT ["./entrypoint.sh"]
CMD ["tail", "-f", "/dev/null"]
8 changes: 2 additions & 6 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,16 @@ services:
spark-lance:
image: spark-lance:latest
container_name: spark-lance
build:
build:
context: .
dockerfile: Dockerfile
args:
- SPARK_VERSION=3.5.6
- LANCE_VERSION=0.0.5
networks:
lance_net:
depends_on:
- minio
volumes:
- ./warehouse:/home/lance/warehouse
- ./notebooks:/home/lance/notebooks
- ../lance-spark-bundle-3.5_2.12/target:/lance-jars
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
Expand Down Expand Up @@ -66,4 +62,4 @@ services:
"

networks:
lance_net:
lance_net:
2 changes: 2 additions & 0 deletions docker/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pytest==8.3.5
pytest-timeout==2.3.1
jupyterlab==3.6.7
jupyter-server==1.24.0
pandas==2.3.1
Expand Down
Loading