Skip to content

Commit c145ca4

Browse files
Issue/142 (Azure#143)
* Add cluster name support to dbricks run
1 parent df6898a commit c145ca4

20 files changed

+446
-66
lines changed

.devcontainer/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,4 @@ RUN git clone https://github.com/ahmetb/kubectx.git /root/.kubectx \
110110
RUN git clone https://github.com/magicmonty/bash-git-prompt.git ~/.bash-git-prompt --depth=1 \
111111
&& echo "if [ -f \"$HOME/.bash-git-prompt/gitprompt.sh\" ]; then GIT_PROMPT_ONLY_IN_REPO=1 && source $HOME/.bash-git-prompt/gitprompt.sh; fi" >> "/root/.bashrc"
112112

113-
ENV PATH="/root/.kubectx:${PATH}"
113+
ENV PATH="/root/.kubectx:${PATH}"

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ cmd/manager/__debug_bin
2121
.env
2222
__debug_bin
2323
settings.json
24+
operatorsetup.yaml
File renamed without changes.

Makefile

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
21
# Image URL to use all building/pushing image targets
32
IMG ?= controller:latest
3+
44
# Produce CRDs that work back to Kubernetes 1.11 (no version conversion)
55
CRD_OPTIONS ?= "crd:trivialVersions=true"
6+
67
# Prometheus helm installation name
78
PROMETHEUS_NAME ?= "prom-azure-databricks-operator"
89

@@ -13,10 +14,11 @@ else
1314
GOBIN=$(shell go env GOBIN)
1415
endif
1516

17+
timestamp := $(shell /bin/date "+%Y%m%d-%H%M%S")
1618
all: manager
1719

1820
# Run tests
19-
test: generate fmt lint vet manifests
21+
test: generate fmt vet manifests lint
2022
rm -rf cover.* cover
2123
mkdir -p cover
2224

@@ -29,7 +31,7 @@ test: generate fmt lint vet manifests
2931
rm -f cover.out cover.out.tmp cover.json
3032

3133
# Run tests with existing cluster
32-
test-existing: generate fmt lint vet manifests
34+
test-existing: generate fmt vet manifests lint
3335
rm -rf cover.* cover
3436
mkdir -p cover
3537

@@ -52,37 +54,35 @@ run: generate fmt lint vet manifests
5254
# Install CRDs into a cluster
5355
install: manifests
5456
kustomize build config/crd | kubectl apply -f -
57+
5558
# Uninstall CRDs from a cluster
5659
uninstall: manifests
5760
kustomize build config/crd | kubectl delete -f -
5861

5962
# Deploy controller in the configured Kubernetes cluster in ~/.kube/config
6063
deploy: manifests
61-
cd config/manager && kustomize edit set image controller=${IMG}
62-
kustomize build config/default | kubectl apply -f -
63-
64-
deploy-controller:
64+
ifeq (,$(shell kubectl get namespace azure-databricks-operator-system))
65+
@echo "creating azure-databricks-operator-system namespace"
6566
kubectl create namespace azure-databricks-operator-system
66-
kubectl --namespace azure-databricks-operator-system \
67+
else
68+
@echo "azure-databricks-operator-system namespace exists"
69+
endif
70+
ifeq (,$(shell kubectl get secret dbrickssettings --namespace azure-databricks-operator-system))
71+
@echo "creating dbrickssettings secret"
72+
kubectl --namespace azure-databricks-operator-system \
6773
create secret generic dbrickssettings \
6874
--from-literal=DatabricksHost="${DATABRICKS_HOST}" \
6975
--from-literal=DatabricksToken="${DATABRICKS_TOKEN}"
76+
else
77+
@echo "dbrickssettings secret exists"
78+
endif
79+
cd config/manager && kustomize edit set image controller=${IMG}
80+
kustomize build config/default | kubectl apply -f -
81+
kustomize build config/default > operatorsetup.yaml
82+
7083

71-
#create image and load it into cluster
72-
IMG="docker.io/controllertest:1" make docker-build
73-
kind load docker-image docker.io/controllertest:1 --loglevel "trace"
74-
make install
75-
make deploy
76-
sed -i'' -e 's@image: .*@image: '"IMAGE_URL"'@' ./config/default/manager_image_patch.yaml
7784

78-
timestamp := $(shell /bin/date "+%Y%m%d-%H%M%S")
7985

80-
update-deployed-controller:
81-
IMG="docker.io/controllertest:$(timestamp)" make ARGS="${ARGS}" docker-build
82-
kind load docker-image docker.io/controllertest:$(timestamp) --loglevel "trace"
83-
make install
84-
make deploy
85-
sed -i'' -e 's@image: .*@image: '"IMAGE_URL"'@' ./config/default/manager_image_patch.yaml
8686

8787
# Generate manifests e.g. CRD, RBAC etc.
8888
manifests: controller-gen
@@ -105,11 +105,10 @@ generate: controller-gen
105105
$(CONTROLLER_GEN) object:headerFile=./hack/boilerplate.go.txt paths="./..."
106106

107107
# Build the docker image
108-
docker-build: test
108+
docker-build:
109109
docker build . -t ${IMG} ${ARGS}
110110
@echo "updating kustomize image patch file for manager resource"
111-
sed -i'' -e 's@image: .*@image: '"${IMG}"'@' ./config/default/manager_image_patch.yaml
112-
111+
cd config/manager && kustomize edit set image controller=${IMG}
113112
# Push the docker image
114113
docker-push:
115114
docker push ${IMG}
@@ -130,6 +129,7 @@ CONTROLLER_GEN=$(GOBIN)/controller-gen
130129
else
131130
CONTROLLER_GEN=$(shell which controller-gen)
132131
endif
132+
133133
create-kindcluster:
134134
ifeq (,$(shell kind get clusters))
135135
@echo "no kind cluster"
@@ -156,10 +156,22 @@ endif
156156

157157
kubectl cluster-info
158158

159-
make install-prometheus
160-
161159
@echo "deploying controller to cluster"
162-
make deploy-controller
160+
make deploy-kindcluster
161+
make install
162+
make install-prometheus
163+
164+
# Deploy controller
165+
deploy-kindcluster:
166+
#create image and load it into cluster
167+
$(eval newimage := "docker.io/controllertest:$(timestamp)")
168+
IMG=$(newimage) make docker-build
169+
kind load docker-image $(newimage) --loglevel "debug"
170+
171+
#deploy operator
172+
IMG=$(newimage) make deploy
173+
#change image name back to orignal image name
174+
cd config/manager && kustomize edit set image controller="IMAGE_URL"
163175

164176
install-kind:
165177
ifeq (,$(shell which kind))

README.md

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,26 @@
77

88
## Introduction
99

10-
Kubernetes offers the facility of extending its API through the concept of 'Operators' ([Introducing Operators: Putting Operational Knowledge into Software](https://coreos.com/blog/introducing-operators.html)). This repository contains the resources and code to deploy an Azure Databricks Operator for Kubernetes.
10+
Kubernetes offers the facility of extending its API through the concept of [Operators](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/). This repository contains the resources and code to deploy an Azure Databricks Operator for Kubernetes.
11+
12+
The Databricks operator is useful in situations where Kubernetes hosted applications wish to launch and use Databricks data engineering and machine learning tasks.
13+
14+
### Key benefits of using Azure Databricks operator
15+
16+
1. Easy to use: Azure Databricks operations can be done by using Kubectl there is no need to learn or install data bricks utils command line and it’s python dependency
17+
18+
2. Security: No need to distribute and use Databricks token, the data bricks token is used by operator
19+
20+
3. Version control: All the YAML or helm charts which has azure data bricks operations (clusters, jobs, …) can be tracked
21+
22+
4. Automation: Replicate azure data bricks operations on any data bricks workspace by applying same manifests or helm charts 
23+
24+
1125

1226
![alt text](docs/images/azure-databricks-operator-highlevel.jpg "high level architecture")
1327

1428

15-
The Databricks operator is useful in situations where Kubernetes hosted applications wish to launch and use Databricks data engineering and machine learning tasks.
29+
1630

1731
![alt text](docs/images/azure-databricks-operator.jpg "high level architecture")
1832

@@ -22,12 +36,66 @@ The project was built using
2236
1. [Kubebuilder](https://book.kubebuilder.io/)
2337
2. [Golang SDK for DataBricks](https://github.com/xinsnake/databricks-sdk-golang)
2438

25-
## Quick start
39+
## How to use Azure Databricks operator
40+
41+
1. Download [the latest release manifests](https://github.com/microsoft/azure-databricks-operator/releases):
42+
43+
```sh
44+
wget https://github.com/microsoft/azure-databricks-operator/releases/latest/download/release.zip
45+
unzip release.zip
46+
```
47+
48+
2. Create the `azure-databricks-operator-system` namespace:
49+
50+
```sh
51+
kubectl create namespace azure-databricks-operator-system
52+
```
53+
54+
3. Create Kubernetes secrets with values for `DATABRICKS_HOST` and `DATABRICKS_TOKEN`:
55+
56+
```shell
57+
kubectl --namespace azure-databricks-operator-system \
58+
create secret generic dbrickssettings \
59+
--from-literal=DatabricksHost="https://xxxx.azuredatabricks.net" \
60+
--from-literal=DatabricksToken="xxxxx"
61+
```
62+
63+
4. Apply the manifests for the Operator and CRDs in `release/config`:
64+
65+
```sh
66+
kubectl apply -f release/config
67+
```
68+
69+
70+
71+
For details deployment guides please see [deploy.md](https://github.com/microsoft/azure-databricks-operator/blob/master/docs/deploy.md)
72+
73+
## Samples
74+
75+
1. Create a spark cluster on demand and run a databricks notebook.
76+
77+
![alt text](docs/images/sample1.gif "sample1")
78+
79+
2. Create an interactive spark cluster and Run a databricks job on exisiting cluster.
80+
81+
![alt text](docs/images/sample2.gif "sample2")
82+
83+
3. Create azure databricks secret scope by using kuberentese secrets
84+
85+
![alt text](docs/images/sample3.gif "sample3")
2686

27-
For deployment guides please see [deploy.md](https://github.com/microsoft/azure-databricks-operator/blob/master/docs/deploy.md)
2887

2988
For samples and simple use cases on how to use the operator please see [samples.md](docs/samples.md)
3089

90+
## Quick start
91+
92+
On click start by using [vscode](https://code.visualstudio.com/)
93+
94+
![alt text](docs/images/devcontainer.gif "devcontainer")
95+
96+
For more details please see
97+
[contributing.md](https://github.com/microsoft/azure-databricks-operator/blob/master/docs/contributing.md)
98+
3199
## Roadmap
32100

33101
Check [roadmap.md](https://github.com/microsoft/azure-databricks-operator/blob/master/docs/roadmap.md) for what has been supported and what's coming.
@@ -36,10 +104,10 @@ Check [roadmap.md](https://github.com/microsoft/azure-databricks-operator/blob/m
36104

37105
Few topics are discussed in the [resources.md](https://github.com/microsoft/azure-databricks-operator/blob/master/docs/resources.md)
38106

39-
- Kubernetes on WSL
40-
- Build pipelines
41107
- Dev container
108+
- Build pipelines
42109
- Operator metrics
110+
- Kubernetes on WSL
43111

44112
## Contributing
45113

api/v1alpha1/run_types.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ type RunSpec struct {
3131
JobName string `json:"job_name,omitempty"`
3232
*dbmodels.RunParameters `json:",inline"`
3333
// dedicated for direct run
34-
RunName string `json:"run_name,omitempty"`
35-
*dbmodels.ClusterSpec `json:",inline"`
36-
*dbmodels.JobTask `json:",inline"`
37-
TimeoutSeconds int32 `json:"timeout_seconds,omitempty"`
34+
RunName string `json:"run_name,omitempty"`
35+
ClusterSpec `json:",inline"`
36+
*dbmodels.JobTask `json:",inline"`
37+
TimeoutSeconds int32 `json:"timeout_seconds,omitempty"`
3838
}
3939

4040
// +kubebuilder:object:root=true

api/v1alpha1/run_types_extra.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package v1alpha1
2+
3+
import (
4+
dbmodels "github.com/xinsnake/databricks-sdk-golang/azure/models"
5+
)
6+
7+
// ClusterSpec is similar to dbmodels.ClusterSpec, the reason it
8+
// exists is because dbmodels.ClusterSpec doesn't support ExistingClusterName
9+
// ExistingClusterName allows discovering databricks clusters by it's kubernetese object name
10+
type ClusterSpec struct {
11+
ExistingClusterID string `json:"existing_cluster_id,omitempty" url:"existing_cluster_id,omitempty"`
12+
ExistingClusterName string `json:"existing_cluster_name,omitempty" url:"existing_cluster_name,omitempty"`
13+
NewCluster *dbmodels.NewCluster `json:"new_cluster,omitempty" url:"new_cluster,omitempty"`
14+
Libraries []dbmodels.Library `json:"libraries,omitempty" url:"libraries,omitempty"`
15+
}
16+
17+
// ToK8sClusterSpec converts a databricks ClusterSpec object to k8s ClusterSpec object.
18+
// It is needed to add ExistingClusterName and follow k8s camleCase naming convention
19+
func ToK8sClusterSpec(dbjs *dbmodels.ClusterSpec) ClusterSpec {
20+
var k8sjs ClusterSpec
21+
k8sjs.ExistingClusterID = dbjs.ExistingClusterID
22+
k8sjs.NewCluster = dbjs.NewCluster
23+
k8sjs.Libraries = dbjs.Libraries
24+
return k8sjs
25+
}
26+
27+
// ToDatabricksClusterSpec converts a k8s ClusterSpec object to a DataBricks ClusterSpec object.
28+
// It is needed to add ExistingClusterName and follow k8s camleCase naming convention
29+
func ToDatabricksClusterSpec(k8sjs *ClusterSpec) dbmodels.ClusterSpec {
30+
31+
var dbjs dbmodels.ClusterSpec
32+
dbjs.ExistingClusterID = k8sjs.ExistingClusterID
33+
dbjs.NewCluster = k8sjs.NewCluster
34+
dbjs.Libraries = k8sjs.Libraries
35+
return dbjs
36+
}

api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 29 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/databricks.microsoft.com_runs.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ spec:
4747
properties:
4848
existing_cluster_id:
4949
type: string
50+
existing_cluster_name:
51+
type: string
5052
jar_params:
5153
items:
5254
type: string

config/default/manager_image_patch.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ spec:
88
spec:
99
containers:
1010
# Change the value of image field below to your controller image URL
11-
- image: IMAGE_URL
12-
name: manager
11+
- name: manager
1312
env:
1413
- name: DATABRICKS_HOST
1514
valueFrom:

0 commit comments

Comments
 (0)