diff --git a/examples/runSparkPi.sh b/examples/runSparkPi.sh new file mode 100755 index 0000000000000..bef732a0fb319 --- /dev/null +++ b/examples/runSparkPi.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Start up the driver, get it's ip address, then start the executor with it +set -e + +echo +echo starting SparkPi driver +armadactl submit examples/spark-pi-driver.yaml > /tmp/jobid.txt +JOB_ID=`cat /tmp/jobid.txt | awk '{print $5}'` +cat /tmp/jobid.txt +echo + + +echo waiting for SparkPi driver to start +sleep 20 + +echo +echo SparkPi driver ip addr: +IP_ADDR=`kubectl get pod "armada-$JOB_ID-0" -o jsonpath='{.status.podIP}'` +echo $IP_ADDR +echo + +echo passing drivers ip addr to executor and starting it +IP_ADDR=$IP_ADDR envsubst < examples/spark-pi-executor.yaml > /tmp/ex.yaml +armadactl submit /tmp/ex.yaml +echo + +echo SparkPi driver/executor started \ No newline at end of file diff --git a/examples/spark-pi-driver.yaml b/examples/spark-pi-driver.yaml new file mode 100644 index 0000000000000..7d7772b907543 --- /dev/null +++ b/examples/spark-pi-driver.yaml @@ -0,0 +1,49 @@ + queue: test + jobSetId: job-set-1 + jobs: + - namespace: default + priority: 0 + podSpec: + terminationGracePeriodSeconds: 0 + restartPolicy: Never + containers: + - name: spark-driver + image: spark:testing + env: + - name: SPARK_DRIVER_BIND_ADDRESS + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + command: + - /opt/entrypoint.sh + args: + - driver + - --verbose + - --class + - org.apache.spark.examples.SparkPi + - --master + - armada://192.168.1.167:50051 + - --conf + - "spark.driver.port=7078" + - --conf + - "spark.driver.extraJavaOptions=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=0.0.0.0:5005" + - local:///opt/spark/examples/jars/spark-examples.jar + - "100" + resources: + limits: + memory: 1Gi + cpu: 1 + requests: + memory: 1Gi + cpu: 1 + ports: + - containerPort: 7078 + name: driver-rpc-port + protocol: TCP + - containerPort: 7079 + name: blockmanager + protocol: TCP + - containerPort: 4040 + name: spark-ui + protocol: TCP diff --git a/examples/spark-pi-executor.yaml b/examples/spark-pi-executor.yaml new file mode 100644 index 0000000000000..995bd1900b725 --- /dev/null +++ b/examples/spark-pi-executor.yaml @@ -0,0 +1,44 @@ + queue: test + jobSetId: job-set-1 + jobs: + - namespace: default + priority: 0 + podSpec: + terminationGracePeriodSeconds: 0 + restartPolicy: Never + containers: + - name: spark-executor + image: spark:testing + env: + - name: SPARK_EXECUTOR_MEMORY + value: "512m" + - name: SPARK_DRIVER_URL + value: "spark://CoarseGrainedScheduler@${IP_ADDR}:7078" + - name: SPARK_EXECUTOR_ID + value: "1" + - name: SPARK_EXECUTOR_CORES + value: "1" + - name: SPARK_APPLICATION_ID + value: "test_spark_app_id" + - name: SPARK_EXECUTOR_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: SPARK_RESOURCE_PROFILE_ID + value: "0" + - name: SPARK_EXECUTOR_POD_NAME + value: "test-pod-name" + - name: SPARK_JAVA_OPT_0 + value: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005 + command: + - /opt/entrypoint.sh + args: + - executor + resources: + limits: + memory: 1Gi + cpu: 1 + requests: + memory: 1Gi + cpu: 1 diff --git a/resource-managers/armada/core/src/main/resources/META-INF/services/io.armadaproject.spark.deploy.SparkSubmitOperation b/resource-managers/armada/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation similarity index 100% rename from resource-managers/armada/core/src/main/resources/META-INF/services/io.armadaproject.spark.deploy.SparkSubmitOperation rename to resource-managers/armada/core/src/main/resources/META-INF/services/org.apache.spark.deploy.SparkSubmitOperation diff --git a/resource-managers/armada/core/src/main/resources/META-INF/services/io.armadaproject.spark.scheduler.ExternalClusterManager b/resource-managers/armada/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager similarity index 92% rename from resource-managers/armada/core/src/main/resources/META-INF/services/io.armadaproject.spark.scheduler.ExternalClusterManager rename to resource-managers/armada/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager index 72cb48ec46478..cdf3b501b3eb9 100644 --- a/resource-managers/armada/core/src/main/resources/META-INF/services/io.armadaproject.spark.scheduler.ExternalClusterManager +++ b/resource-managers/armada/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager @@ -15,4 +15,4 @@ # limitations under the License. # -org.apache.spark.scheduler.cluster.k8s.KubernetesClusterManager +org.apache.spark.scheduler.cluster.armada.ArmadaClusterManager \ No newline at end of file diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh index f9561b9aa4ed5..09b7c52b5b889 100755 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh @@ -85,6 +85,7 @@ case "$1" in "$SPARK_HOME/bin/spark-submit" --conf "spark.driver.bindAddress=$SPARK_DRIVER_BIND_ADDRESS" --conf "spark.executorEnv.SPARK_DRIVER_POD_IP=$SPARK_DRIVER_BIND_ADDRESS" + --conf "spark.driver.host=$SPARK_DRIVER_BIND_ADDRESS" --deploy-mode client "$@" )