diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 83b13b83341d..ad31bd1e7b7a 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -232,7 +232,8 @@ Commands: push Push a pre-built image to a registry. Requires a repository address to be provided. Options: - -f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark. + -f file (Optional) Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark. + For Java 17, use `-f kubernetes/dockerfiles/spark/Dockerfile.java17` -p file (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark. Skips building PySpark docker image if not specified. -R file (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark. @@ -267,15 +268,19 @@ Examples: $0 -r docker.io/myrepo -t v2.3.0 build $0 -r docker.io/myrepo -t v2.3.0 push - - Build and push JDK11-based image with tag "v3.0.0" to docker.io/myrepo + - Build and push Java11-based image with tag "v3.0.0" to docker.io/myrepo $0 -r docker.io/myrepo -t v3.0.0 -b java_image_tag=11-jre-slim build $0 -r docker.io/myrepo -t v3.0.0 push - - Build and push JDK11-based image for multiple archs to docker.io/myrepo + - Build and push Java11-based image for multiple archs to docker.io/myrepo $0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build # Note: buildx, which does cross building, needs to do the push during build # So there is no separate push step with -X + - Build and push Java17-based image with tag "v3.3.0" to docker.io/myrepo + $0 -r docker.io/myrepo -t v3.3.0 -f kubernetes/dockerfiles/spark/Dockerfile.java17 build + $0 -r docker.io/myrepo -t v3.3.0 push + EOF } diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 new file mode 100644 index 000000000000..f9ab64e94a54 --- /dev/null +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 @@ -0,0 +1,62 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# We need to build from debian:bullseye-slim because openjdk switches its underlying OS +# from debian to oraclelinux from openjdk:12 +FROM debian:bullseye-slim + +ARG spark_uid=185 + +# Before building the docker image, first build and make a Spark distribution following +# the instructions in http://spark.apache.org/docs/latest/building-spark.html. +# If this docker file is being used in the context of building your images from a Spark +# distribution, the docker build command should be invoked from the top level directory +# of the Spark distribution. E.g.: +# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile . + +RUN set -ex && \ + apt-get update && \ + ln -s /lib /lib64 && \ + apt install -y bash tini libc6 libpam-modules krb5-user libnss3 procps openjdk-17-jre && \ + mkdir -p /opt/spark && \ + mkdir -p /opt/spark/examples && \ + mkdir -p /opt/spark/work-dir && \ + touch /opt/spark/RELEASE && \ + rm /bin/sh && \ + ln -sv /bin/bash /bin/sh && \ + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ + rm -rf /var/cache/apt/* + +COPY jars /opt/spark/jars +COPY bin /opt/spark/bin +COPY sbin /opt/spark/sbin +COPY kubernetes/dockerfiles/spark/entrypoint.sh /opt/ +COPY kubernetes/dockerfiles/spark/decom.sh /opt/ +COPY examples /opt/spark/examples +COPY kubernetes/tests /opt/spark/tests +COPY data /opt/spark/data + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir +RUN chmod g+w /opt/spark/work-dir +RUN chmod a+x /opt/decom.sh + +ENTRYPOINT [ "/opt/entrypoint.sh" ] + +# Specify the User that the actual main process will run as +USER ${spark_uid}