Skip to content
This repository was archived by the owner on Oct 23, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
05b06b5
Upgrading Konvoy to 1.5, upgrading fabric8 K8s client to 4.9.2
akirillov Aug 18, 2020
834cd0a
Switching to a dedicated branch containing required changes
akirillov Aug 19, 2020
8bac859
Bumping Spark to 3.0.0
akirillov Aug 20, 2020
ff17056
Updating test applications to Spark 3
akirillov Aug 20, 2020
200fa93
Updating operator Dockerfile after switching to go modules
akirillov Aug 20, 2020
628c406
Adding missing apt dependencies
akirillov Aug 20, 2020
7982d02
Updating examples jar name in test manifests
akirillov Aug 20, 2020
304e1c4
Increase the waiting timeout for Spark UI
alembiewski Aug 20, 2020
5a12655
Increasing instance size to accomodate the workloads
akirillov Aug 20, 2020
5b4ee6a
Merge branch 'update-fabric-client-for-k8s-1.17' of github.com:mesosp…
akirillov Aug 20, 2020
ac8b1a8
Increase numnber of nodes
akirillov Aug 20, 2020
2732b24
Adding cluster.yaml template for explicit configuration
akirillov Aug 20, 2020
dfd9f63
Updating Makefile to not fail when AWS_SESSION_TOKEN is not set
akirillov Aug 20, 2020
164478f
Disabling Istio and DNS addons
akirillov Aug 20, 2020
1aa2796
Updating jsonpath spec to capture instance status in KUDO 0.15.0
akirillov Aug 20, 2020
0d9995f
Replacing match string in R test, lowering down timeout to 3 minutes
akirillov Aug 20, 2020
30738fa
Bumping the number of workers to 5
akirillov Aug 21, 2020
c933136
Print driver logs in case of error
alembiewski Aug 21, 2020
1d4c7ef
Remove redundant code for handling RPC authentication
alembiewski Aug 21, 2020
1e630d9
Bump kudo version in builder image
alembiewski Aug 21, 2020
8da0f90
Removing broken linear regression application, easing metrics test cr…
akirillov Aug 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ konvoy-*
license.*

admin.conf
cluster.*.yaml
cluster.yaml
inventory.yaml
license.txt

Expand Down
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ SCRIPTS_DIR := $(ROOT_DIR)/scripts
KUDO_TOOLS_DIR := $(ROOT_DIR)/shared
SPARK_OPERATOR_DIR := $(ROOT_DIR)/spark-on-k8s-operator

export KONVOY_VERSION ?= v1.3.0
export WORKER_NODE_INSTANCE_TYPE ?= m5.xlarge
export KONVOY_VERSION ?= v1.5.0
export CLUSTER_CONFIG_YAML ?= $(ROOT_DIR)/cluster.template.yaml
export WORKER_NODE_INSTANCE_TYPE ?= m5.2xlarge
export WORKER_NODE_COUNT ?= 5

export NAMESPACE ?= spark
Expand Down Expand Up @@ -160,7 +161,7 @@ clean-all:

# function for extracting the value of an AWS property passed as an argument
define get_aws_credential
$(if $(AWS_PROFILE),$(shell cat ~/.aws/credentials | grep ${AWS_PROFILE} -A3 | tail -n3 | grep $1 | xargs | cut -d' ' -f3),$(error AWS_PROFILE is not set))
$(if $(AWS_PROFILE),$(shell cat ~/.aws/credentials | grep ${AWS_PROFILE} -A3 | tail -n3 | grep $1 | xargs | cut -d ' ' -f3),$(warning unable to update $1 from AWS credentials file: AWS_PROFILE is not provided. $1 will not be set))
endef

# function for calculating global checksum of directories and files passed as arguments.
Expand Down
184 changes: 184 additions & 0 deletions cluster.template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
kind: ClusterProvisioner
apiVersion: konvoy.mesosphere.io/v1beta2
metadata:
name: kudo-spark
creationTimestamp: "2020-01-28T23:15:38Z"
spec:
provider: aws
aws:
region: us-west-2
vpc:
overrideDefaultRouteTable: true
enableInternetGateway: true
enableVPCEndpoints: true
availabilityZones:
- us-west-2c
elb:
apiServerPort: 6443
tags:
owner: default
nodePools:
- name: worker
count: 4
machine:
rootVolumeSize: 80
rootVolumeType: gp2
imagefsVolumeEnabled: true
imagefsVolumeSize: 160
imagefsVolumeType: gp2
imagefsVolumeDevice: xvdb
type: m5.2xlarge
- name: control-plane
controlPlane: true
count: 1
machine:
rootVolumeSize: 80
rootVolumeType: io1
rootVolumeIOPS: 1000
imagefsVolumeEnabled: true
imagefsVolumeSize: 160
imagefsVolumeType: gp2
imagefsVolumeDevice: xvdb
type: m5.xlarge
- name: bastion
bastion: true
count: 0
machine:
rootVolumeSize: 10
rootVolumeType: gp2
imagefsVolumeEnabled: false
type: m5.large
sshCredentials:
user: centos
publicKeyFile: kudo-spark-ssh.pub
privateKeyFile: kudo-spark-ssh.pem
version: v1.5.0
---
kind: ClusterConfiguration
apiVersion: konvoy.mesosphere.io/v1beta2
metadata:
name: kudo-spark
creationTimestamp: "2020-01-28T23:15:38Z"
spec:
kubernetes:
version: 1.17.8
controlPlane:
controlPlaneEndpointOverride: ""
certificate: {}
networking:
podSubnet: 192.168.0.0/16
serviceSubnet: 10.0.0.0/18
iptables:
addDefaultRules: false
httpProxy: ""
httpsProxy: ""
cloudProvider:
provider: aws
admissionPlugins:
enabled:
- AlwaysPullImages
- NodeRestriction
containerNetworking:
calico:
version: v3.13.4
encapsulation: ipip
mtu: 1480
containerRuntime:
containerd:
version: 1.3.4
osPackages:
enableAdditionalRepositories: true
nodePools:
- name: worker
addons:
- configRepository: https://github.com/mesosphere/kubernetes-base-addons
configVersion: stable-1.17-2.0.2
addonsList:
- name: awsebscsiprovisioner
enabled: true
- name: awsebsprovisioner
enabled: false
values: |
storageclass:
isDefault: false
- name: cert-manager
enabled: true
- name: dashboard
enabled: true
- name: defaultstorageclass-protection
enabled: true
- name: dex
enabled: true
- name: dex-k8s-authenticator
enabled: true
- name: elasticsearch
enabled: false
- name: elasticsearchexporter
enabled: false
- name: flagger
enabled: false
- name: fluentbit
enabled: false
- name: gatekeeper
enabled: true
- name: external-dns
enabled: false
- name: istio
enabled: false
- name: kibana
enabled: false
- name: konvoyconfig
enabled: true
- name: kube-oidc-proxy
enabled: true
- name: localvolumeprovisioner
enabled: false
values: |
# Multiple storage classes can be defined here. This allows to, e.g.,
# distinguish between different disk types.
# For each entry a storage class '$name' and
# a host folder '/mnt/$dirName' will be created. Volumes mounted to this
# folder are made available in the storage class.
storageclasses:
- name: localvolumeprovisioner
dirName: disks
isDefault: false
reclaimPolicy: Delete
volumeBindingMode: WaitForFirstConsumer
- name: nvidia
enabled: false
- name: opsportal
enabled: true
- name: prometheus
enabled: true
- name: prometheusadapter
enabled: true
- name: reloader
enabled: false
- name: traefik
enabled: true
- name: traefik-forward-auth
enabled: true
values: |
traefikForwardAuth:
allowedUser:
valueFrom:
secretKeyRef: null
- name: velero
enabled: false
- configRepository: https://github.com/mesosphere/kubeaddons-conductor
configVersion: stable-1.17-1.0.0
addonsList:
- name: conductor
enabled: false
- configRepository: https://github.com/mesosphere/kubeaddons-dispatch
configVersion: stable-1.17-1.2.2
addonsList:
- name: dispatch
enabled: false
- configRepository: https://github.com/mesosphere/kubeaddons-kommander
configVersion: stable-1.17-1.1.0
addonsList:
- name: kommander
enabled: false
version: v1.5.0
4 changes: 2 additions & 2 deletions images/builder/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM golang:1.13.0@sha256:de697ce5ae02f3d9a57b0603fbb648efadfa212727e702ad3a807b43eba7f6d6

ARG KUDO_DOWNLOAD_URL=https://github.com/kudobuilder/kudo/releases/download/v0.13.0/kubectl-kudo_0.13.0_linux_x86_64
ARG KUBECTL_DOWNLOAD_URL=https://storage.googleapis.com/kubernetes-release/release/v1.16.0/bin/linux/amd64/kubectl
ARG KUDO_DOWNLOAD_URL=https://github.com/kudobuilder/kudo/releases/download/v0.15.0/kubectl-kudo_0.15.0_linux_x86_64
ARG KUBECTL_DOWNLOAD_URL=https://storage.googleapis.com/kubernetes-release/release/v1.17.8/bin/linux/amd64/kubectl
ARG JAVA_URL=https://downloads.mesosphere.com/java/openjdk-8u212b03-hotspot-linux-x64.tar.gz
ENV JAVA_HOME=/usr/local/java
ENV PATH=$PATH:${JAVA_HOME}/bin
Expand Down
28 changes: 17 additions & 11 deletions images/operator/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
ARG SPARK_IMAGE=mesosphere/spark:spark-2.4.5-hadoop-2.9-k8s

FROM golang:1.12.5-alpine as builder
LABEL stage=spark-operator-builder
ARG DEP_VERSION="0.5.3"
RUN apk add --no-cache bash git
ADD https://github.com/golang/dep/releases/download/v${DEP_VERSION}/dep-linux-amd64 /usr/bin/dep
RUN chmod +x /usr/bin/dep
FROM golang:1.14.1-alpine as builder

COPY . ${GOPATH}/src/github.com/GoogleCloudPlatform/spark-on-k8s-operator
WORKDIR ${GOPATH}/src/github.com/GoogleCloudPlatform/spark-on-k8s-operator
RUN dep ensure -vendor-only \
&& go generate \
&& CGO_ENABLED=0 GOOS=linux go build -o /usr/bin/spark-operator
WORKDIR /workspace

# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
# Cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
RUN go mod download

# Copy the go source code
COPY main.go main.go
COPY pkg/ pkg/

# Build
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o /usr/bin/spark-operator main.go

FROM ${SPARK_IMAGE}
USER root
COPY --from=builder /usr/bin/spark-operator /usr/bin/
COPY hack/gencerts.sh /usr/bin/

Expand Down
6 changes: 3 additions & 3 deletions images/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ FROM ubuntu:18.04
ARG DEBIAN_FRONTEND=noninteractive
ARG TINI_VERSION=v0.18.0
ARG SPARK_REPO="apache/spark"
ARG SPARK_TAG="v2.4.5"
ARG SCALA_VERSION="2.11"
ARG SPARK_TAG="v3.0.0"
ARG SCALA_VERSION="2.12"
ARG HADOOP_VERSION="2.9.2"

ARG SPARK_BUILD_ARGS="\
Expand All @@ -30,7 +30,7 @@ ENV PATH $JAVA_HOME/bin:$PATH
RUN set -ex && \
apt-get update && \
ln -s /lib /lib64 && \
apt-get install --no-install-recommends -y bash libc6 libpam-modules krb5-user libnss3 git curl openjdk-8-jdk \
apt-get install --no-install-recommends -y bash libc6 libpam-modules krb5-user libnss3 git openssl curl openjdk-8-jdk \
r-base r-base-dev python python-pip python3 python3-pip ngrep && \
curl -L https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini -o /usr/bin/tini && \
chmod +x /usr/bin/tini && \
Expand Down
7 changes: 0 additions & 7 deletions images/spark/conf/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -94,20 +94,13 @@ function add_spark_conf_if_non_empty() {
# This method is required for passing secret data to spark-submit via environment variables,
# injected by K8s via Secrets.
function configure_spark_properties() {
add_spark_conf_if_non_empty "spark.authenticate.secret" "${SPARK_AUTHENTICATE_SECRET}"
add_spark_conf_if_non_empty "spark.ssl.keyPassword" "${SPARK_SSL_KEYPASSWORD}"
add_spark_conf_if_non_empty "spark.ssl.keyStorePassword" "${SPARK_SSL_KEYSTOREPASSWORD}"
add_spark_conf_if_non_empty "spark.ssl.trustStorePassword" "${SPARK_SSL_TRUSTSTOREPASSWORD}"
}

configure_spark_properties

# if SPARK_AUTHENTICATE_SECRET is set, enable RPC authetication for executors
# and provide the auth secret via _SPARK_AUTH_SECRET variable
if [[ -n "${SPARK_AUTHENTICATE_SECRET}" ]]; then
SPARK_EXECUTOR_JAVA_OPTS+=("-Dspark.authenticate=true")
export _SPARK_AUTH_SECRET="${SPARK_AUTHENTICATE_SECRET}"
fi

case "$SPARK_K8S_CMD" in
driver)
Expand Down
2 changes: 1 addition & 1 deletion operators
Submodule operators updated 106 files
2 changes: 1 addition & 1 deletion scale-tests/templates/gensort-application.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ spec:
image: mesosphere/spark:spark-2.4.5-hadoop-2.9-k8s
imagePullPolicy: Always
mainClass: sorting.DatasetGenerator
mainApplicationFile: "https://kudo-spark.s3-us-west-2.amazonaws.com/spark-scala-tests-2.4.5-20200225.jar"
mainApplicationFile: "https://kudo-spark.s3-us-west-2.amazonaws.com/spark-scala-tests-3.0.0-20200819.jar"
arguments:
- "--num-files"
- "1000"
Expand Down
2 changes: 1 addition & 1 deletion scale-tests/templates/scale-test-application.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ spec:
image: mesosphere/spark:spark-2.4.5-hadoop-2.9-k8s
imagePullPolicy: Always
mainClass: MockTaskRunner
mainApplicationFile: "https://kudo-spark.s3-us-west-2.amazonaws.com/spark-scala-tests-2.4.5-20200225.jar"
mainApplicationFile: "https://kudo-spark.s3-us-west-2.amazonaws.com/spark-scala-tests-3.0.0-20200819.jar"
arguments:
- "NUM_TASKS"
- "TASK_DURATION_SEC"
Expand Down
2 changes: 1 addition & 1 deletion scale-tests/templates/sort-application.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ spec:
image: mesosphere/spark:spark-2.4.5-hadoop-2.9-k8s
imagePullPolicy: Always
mainClass: sorting.SortingApp
mainApplicationFile: "https://kudo-spark.s3-us-west-2.amazonaws.com/spark-scala-tests-2.4.5-20200225.jar"
mainApplicationFile: "https://kudo-spark.s3-us-west-2.amazonaws.com/spark-scala-tests-3.0.0-20200819.jar"
arguments:
- "SOURCE_PATH"
- "TARGET_PATH"
Expand Down
2 changes: 1 addition & 1 deletion spark-on-k8s-operator
Submodule spark-on-k8s-operator updated 53 files
+1 −13 .travis.yml
+18 −11 Dockerfile
+17 −11 Dockerfile.rh
+0 −1,261 Gopkg.lock
+0 −114 Gopkg.toml
+1 −4 Makefile
+3 −3 README.md
+274 −10 docs/api-docs.md
+4 −5 docs/design.md
+3 −24 docs/developer-guide.md
+5 −2 docs/quick-start-guide.md
+122 −50 docs/user-guide.md
+2 −0 docs/who-is-using.md
+64 −0 go.mod
+836 −0 go.sum
+91 −0 hack/generate-groups.sh
+2 −2 hack/update-codegen.sh
+41 −18 main.go
+636 −19 manifest/crds/sparkoperator.k8s.io_scheduledsparkapplications.yaml
+636 −19 manifest/crds/sparkoperator.k8s.io_sparkapplications.yaml
+3 −0 manifest/spark-operator-rbac.yaml
+2 −2 pkg/apis/sparkoperator.k8s.io/v1beta1/zz_generated.deepcopy.go
+91 −7 pkg/apis/sparkoperator.k8s.io/v1beta2/types.go
+122 −6 pkg/apis/sparkoperator.k8s.io/v1beta2/zz_generated.deepcopy.go
+1 −1 pkg/batchscheduler/interface/interface.go
+40 −42 pkg/batchscheduler/volcano/volcano_scheduler.go
+7 −8 pkg/client/clientset/versioned/clientset.go
+6 −6 pkg/client/clientset/versioned/fake/clientset_generated.go
+1 −2 pkg/client/clientset/versioned/typed/sparkoperator.k8s.io/v1beta1/sparkoperator.k8s.io_client.go
+1 −2 pkg/client/clientset/versioned/typed/sparkoperator.k8s.io/v1beta2/sparkoperator.k8s.io_client.go
+30 −1 pkg/config/constants.go
+5 −2 pkg/controller/scheduledsparkapplication/controller.go
+31 −6 pkg/controller/scheduledsparkapplication/controller_test.go
+98 −88 pkg/controller/sparkapplication/controller.go
+245 −8 pkg/controller/sparkapplication/controller_test.go
+39 −24 pkg/controller/sparkapplication/monitoring_config.go
+127 −16 pkg/controller/sparkapplication/monitoring_config_test.go
+90 −19 pkg/controller/sparkapplication/sparkapp_metrics.go
+12 −2 pkg/controller/sparkapplication/sparkapp_metrics_test.go
+75 −15 pkg/controller/sparkapplication/sparkapp_util.go
+59 −0 pkg/controller/sparkapplication/sparkapp_util_test.go
+45 −11 pkg/controller/sparkapplication/sparkui.go
+242 −58 pkg/controller/sparkapplication/sparkui_test.go
+69 −7 pkg/controller/sparkapplication/submission.go
+191 −2 pkg/controller/sparkapplication/submission_test.go
+43 −0 pkg/util/histogram_buckets.go
+8 −7 pkg/util/metrics.go
+175 −56 pkg/webhook/patch.go
+141 −5 pkg/webhook/patch_test.go
+4 −4 pkg/webhook/webhook.go
+1 −1 spark-docker/Dockerfile
+1 −1 sparkctl/README.md
+3 −3 test/e2e/volume_mount_test.go
2 changes: 1 addition & 1 deletion specs/spark-application.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ spec:
image: mesosphere/spark:spark-2.4.5-hadoop-2.9-k8s
imagePullPolicy: Always
mainClass: MockTaskRunner
mainApplicationFile: "https://kudo-spark.s3-us-west-2.amazonaws.com/spark-scala-tests-2.4.5-20200225.jar"
mainApplicationFile: "https://kudo-spark.s3-us-west-2.amazonaws.com/spark-scala-tests-3.0.0-20200819.jar"
arguments:
- "1"
- "120"
Expand Down
10 changes: 5 additions & 5 deletions tests/basic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ func TestJobSubmission(t *testing.T) {
}

job := utils.SparkJob{
Name: "linear-regression",
Template: "spark-linear-regression-job.yaml",
Name: "spark-pi",
Template: "spark-pi.yaml",
}

err = spark.SubmitJob(&job)
Expand Down Expand Up @@ -181,9 +181,9 @@ func TestSparkHistoryServerInstallation(t *testing.T) {
}

job := utils.SparkJob{
Name: "history-server-linear-regression",
Name: "spark-pi-history-server",
Params: sparkAppParams,
Template: "spark-linear-regression-history-server-job.yaml",
Template: "spark-pi-history-server-job.yaml",
}

// Submit a SparkApplication
Expand Down Expand Up @@ -347,7 +347,7 @@ func TestRSupport(t *testing.T) {
t.Fatal(err)
}

if err := spark.WaitForOutput(job, "3 2.997274"); err != nil {
if err := spark.WaitForOutput(job, "userId movieId rating prediction"); err != nil {
t.Fatal(err)
}

Expand Down
Loading