apache · vanzin · Jan 4, 2018 · Jan 4, 2018 · Jan 4, 2018 · Jan 4, 2018
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
@@ -16,6 +16,8 @@ Kubernetes scheduler that has been added to Spark.
 you may setup a test cluster on your local machine using
 [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/).
   * We recommend using the latest release of minikube with the DNS addon enabled.
+  * Be aware that the default minikube configuration is not enough for running Spark applications.
+  You will need to increase the available memory and number of CPUs.
 * You must have appropriate permissions to list, create, edit and delete
 [pods](https://kubernetes.io/docs/user-guide/pods/) in your cluster. You can verify that you can list these resources
 by running `kubectl auth can-i <list|create|edit|delete> pods`.
@@ -197,7 +199,7 @@ kubectl port-forward <driver-pod-name> 4040:4040
 
 Then, the Spark driver UI can be accessed on `http://localhost:4040`.
 
-### Debugging 
+### Debugging
 
 There may be several kinds of failures. If the Kubernetes API server rejects the request made from spark-submit, or the
 connection is refused for a different reason, the submission logic should indicate the error encountered. However, if there
@@ -215,8 +217,8 @@ If the pod has encountered a runtime error, the status can be probed further usi
 kubectl logs <spark-driver-pod>
 ```
 
-Status and logs of failed executor pods can be checked in similar ways. Finally, deleting the driver pod will clean up the entire spark 
-application, includling all executors, associated service, etc. The driver pod can be thought of as the Kubernetes representation of 
+Status and logs of failed executor pods can be checked in similar ways. Finally, deleting the driver pod will clean up the entire spark
+application, includling all executors, associated service, etc. The driver pod can be thought of as the Kubernetes representation of
 the Spark application.
 
 ## Kubernetes Features

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/driver/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/driver/Dockerfile
@@ -15,7 +15,8 @@
 # limitations under the License.
 #
 
-FROM spark-base
+ARG base_image
+FROM ${base_image}
 
 # Before building the docker image, first build and make a Spark distribution following
 # the instructions in http://spark.apache.org/docs/latest/building-spark.html.

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/executor/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/executor/Dockerfile
@@ -15,7 +15,8 @@
 # limitations under the License.
 #
 
-FROM spark-base
+ARG base_image
+FROM ${base_image}
 
 # Before building the docker image, first build and make a Spark distribution following
 # the instructions in http://spark.apache.org/docs/latest/building-spark.html.

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/init-container/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/init-container/Dockerfile
@@ -15,7 +15,8 @@
 # limitations under the License.
 #
 
-FROM spark-base
+ARG base_image
+FROM ${base_image}
 
 # If this docker file is being used in the context of building your images from a Spark distribution, the docker build
 # command should be invoked from the top level directory of the Spark distribution. E.g.:

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark-base/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark-base/Dockerfile
@@ -17,6 +17,9 @@
 
 FROM openjdk:8-alpine
 
+ARG spark_jars
+ARG img_path
+
 # Before building the docker image, first build and make a Spark distribution following
 # the instructions in http://spark.apache.org/docs/latest/building-spark.html.
 # If this docker file is being used in the context of building your images from a Spark
@@ -34,11 +37,11 @@ RUN set -ex && \
     ln -sv /bin/bash /bin/sh && \
     chgrp root /etc/passwd && chmod ug+rw /etc/passwd
 
-COPY jars /opt/spark/jars
+COPY ${spark_jars} /opt/spark/jars
 COPY bin /opt/spark/bin
 COPY sbin /opt/spark/sbin
 COPY conf /opt/spark/conf
-COPY kubernetes/dockerfiles/spark-base/entrypoint.sh /opt/
+COPY ${img_path}/spark-base/entrypoint.sh /opt/
 
 ENV SPARK_HOME /opt/spark
 

diff --git a/sbin/build-push-docker-images.sh b/sbin/build-push-docker-images.sh
@@ -19,51 +19,118 @@
 # This script builds and pushes docker images when run from a release of Spark
 # with Kubernetes support.
 
-declare -A path=( [spark-driver]=kubernetes/dockerfiles/driver/Dockerfile \
-                  [spark-executor]=kubernetes/dockerfiles/executor/Dockerfile \
-                  [spark-init]=kubernetes/dockerfiles/init-container/Dockerfile )
+function error {
+  echo "$@" 1>&2
+  exit 1
+}
+
+# Detect whether this is a git clone or a Spark distribution and adjust paths
+# accordingly.
+if [ -z "${SPARK_HOME}" ]; then
+  SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+if [ -f "$SPARK_HOME/RELEASE" ]; then
+  IMG_PATH="kubernetes/dockerfiles"
+  SPARK_JARS="jars"
+else
+  IMG_PATH="resource-managers/kubernetes/docker/src/main/dockerfiles"
+  SPARK_JARS="assembly/target/scala-$SPARK_SCALA_VERSION/jars"
+fi
+
+if [ ! -d "$IMG_PATH" ]; then
+  error "Cannot find docker images. This script must be run from a runnable distribution of Apache Spark."
+fi
+
+declare -A path=( [spark-driver]="$IMG_PATH/driver/Dockerfile" \
+                  [spark-executor]="$IMG_PATH/executor/Dockerfile" \
+                  [spark-init]="$IMG_PATH/init-container/Dockerfile" )
+
+function image_ref {
+  local image="$1"
+  local add_repo="${2:-1}"
+  if [ $add_repo = 1 ] && [ -n "$REPO" ]; then
+    image="$REPO/$image"
+  fi
+  if [ -n "$TAG" ]; then
+    image="$image:$TAG"
+  fi
+  echo "$image"
+}
 
 function build {
-  docker build -t spark-base -f kubernetes/dockerfiles/spark-base/Dockerfile .
+  local base_image="$(image_ref spark-base 0)"
+  docker build --build-arg "spark_jars=$SPARK_JARS" \
+    --build-arg "img_path=$IMG_PATH" \
+    -t "$base_image" \
+    -f "$IMG_PATH/spark-base/Dockerfile" .
   for image in "${!path[@]}"; do
-    docker build -t ${REPO}/$image:${TAG} -f ${path[$image]} .
+    docker build --build-arg "base_image=$base_image" -t "$(image_ref $image)" -f ${path[$image]} .
   done
 }
 
-
 function push {
   for image in "${!path[@]}"; do
-    docker push ${REPO}/$image:${TAG}
+    docker push "$(image_ref $image)"
   done
 }
 
 function usage {
-  echo "This script must be run from a runnable distribution of Apache Spark."
-  echo "Usage: ./sbin/build-push-docker-images.sh -r <repo> -t <tag> build"
-  echo "       ./sbin/build-push-docker-images.sh -r <repo> -t <tag> push"
-  echo "for example: ./sbin/build-push-docker-images.sh -r docker.io/myrepo -t v2.3.0 push"
+  cat <<EOF
+Usage: $0 [options] [command]
+Builds or pushes the built-in Spark docker images.
+
+Commands:
+  build       Build docker images.
+  push        Push images to a registry. Requires a repository address to be provided, both
+              when building and when pushing the images.
+
+Options:
+  -r repo     Repository address.
+  -t tag      Tag to apply to built images, or to identify images to be pushed.
+  -m          Use minikube environment when invoking docker.
+
+Example:
+  $0 -r docker.io/myrepo -t v2.3.0 push
+EOF
 }
 
 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   usage
   exit 0
 fi
 
-while getopts r:t: option
+REPO=
+TAG=
+while getopts mr:t: option
 do
  case "${option}"
  in
  r) REPO=${OPTARG};;
  t) TAG=${OPTARG};;
+ m)
+   if ! which minikube 1>/dev/null; then
+     error "Cannot find minikube."
+   fi
+   eval $(minikube docker-env)
+   ;;
  esac
 done
 
-if [ -z "$REPO" ] || [ -z "$TAG" ]; then
+case "${@: -1}" in
+  build)
+    build
+    ;;
+  push)
+    if [ -z "$REPO" ]; then
+      usage
+      exit 1
+    fi
+    push
+    ;;
+  *)
     usage
-else
-  case "${@: -1}" in
-    build) build;;
-    push) push;;
-    *) usage;;
-  esac
-fi
+    exit 1
+    ;;
+esac