From d5a2af0e431f6154db294c864536c1ba53ac766a Mon Sep 17 00:00:00 2001 From: TobiasPyttel Date: Sat, 2 Nov 2024 18:58:52 +0100 Subject: [PATCH 01/17] HDDS-11618. Enable HA modes for OM and SCM --- charts/ozone/templates/_helpers.tpl | 48 +++++++++++++++++++ .../templates/om/om-service-headless.yaml | 4 ++ charts/ozone/templates/om/om-statefulset.yaml | 1 + .../templates/scm/scm-service-headless.yaml | 4 ++ .../ozone/templates/scm/scm-statefulset.yaml | 19 ++++++++ charts/ozone/values.yaml | 4 ++ 6 files changed, 80 insertions(+) diff --git a/charts/ozone/templates/_helpers.tpl b/charts/ozone/templates/_helpers.tpl index e5f56a8..de3e086 100644 --- a/charts/ozone/templates/_helpers.tpl +++ b/charts/ozone/templates/_helpers.tpl @@ -51,6 +51,26 @@ app.kubernetes.io/instance: {{ .Release.Name }} {{- $pods | join "," }} {{- end }} +{{/* List of comma separated om ids */}} +{{- define "ozone.om.cluster.ids" -}} + {{- $pods := list }} + {{- $replicas := .Values.om.replicas | int }} + {{- range $i := until $replicas }} + {{- $pods = append $pods (printf "ozone-om-%d" $i) }} + {{- end }} + {{- $pods | join "," }} +{{- end }} + +{{/* List of comma separated om ids */}} +{{- define "ozone.scm.cluster.ids" -}} + {{- $pods := list }} + {{- $replicas := .Values.scm.replicas | int }} + {{- range $i := until $replicas }} + {{- $pods = append $pods (printf "ozone-scm-%d" $i) }} + {{- end }} + {{- $pods | join "," }} +{{- end }} + {{/* Common configuration environment variables */}} {{- define "ozone.configuration.env" -}} - name: OZONE-SITE.XML_hdds.datanode.dir @@ -59,14 +79,42 @@ app.kubernetes.io/instance: {{ .Release.Name }} value: /data - name: OZONE-SITE.XML_ozone.metadata.dirs value: /data/metadata +{{- if gt (int .Values.scm.replicas) 1 }} +- name: OZONE-SITE.XML_ozone.scm.ratis.enable + value: "true" +- name: OZONE-SITE.XML_ozone.scm.service.ids + value: cluster1 +- name: OZONE-SITE.XML_ozone.scm.nodes.cluster1 + value: {{ include "ozone.scm.cluster.ids" . }} +{{- range $i, $val := until ( .Values.scm.replicas | int ) }} +- name: {{ printf "OZONE-SITE.XML_ozone.scm.address.cluster1.ozone-scm-%d" $i }} + value: {{ printf "%s-scm-%d.%s-scm-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Values.namespace }} +{{- end }} +- name: OZONE-SITE.XML_ozone.scm.primordial.node.id + value: "ozone-scm-0" +{{- else }} - name: OZONE-SITE.XML_ozone.scm.block.client.address value: {{ include "ozone.scm.pods" . }} - name: OZONE-SITE.XML_ozone.scm.client.address value: {{ include "ozone.scm.pods" . }} - name: OZONE-SITE.XML_ozone.scm.names value: {{ include "ozone.scm.pods" . }} +{{- end}} +{{- if gt (int .Values.om.replicas) 1 }} +- name: OZONE-SITE.XML_ozone.om.ratis.enable + value: "true" +- name: OZONE-SITE.XML_ozone.om.service.ids + value: cluster1 +- name: OZONE-SITE.XML_ozone.om.nodes.cluster1 + value: {{ include "ozone.om.cluster.ids" . }} +{{- range $i, $val := until ( .Values.om.replicas | int ) }} +- name: {{ printf "OZONE-SITE.XML_ozone.om.address.cluster1.ozone-om-%d" $i }} + value: {{ printf "%s-om-%d.%s-om-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Values.namespace }} +{{- end }} +{{- else }} - name: OZONE-SITE.XML_ozone.om.address value: {{ include "ozone.om.pods" . }} +{{- end}} - name: OZONE-SITE.XML_hdds.scm.safemode.min.datanode value: "3" - name: OZONE-SITE.XML_ozone.datanode.pipeline.limit diff --git a/charts/ozone/templates/om/om-service-headless.yaml b/charts/ozone/templates/om/om-service-headless.yaml index d16659b..0279555 100644 --- a/charts/ozone/templates/om/om-service-headless.yaml +++ b/charts/ozone/templates/om/om-service-headless.yaml @@ -28,6 +28,10 @@ spec: ports: - name: ui port: {{ .Values.om.service.port }} + {{- if gt (int .Values.om.replicas) 1 }} + - name: ratis + port: {{ .Values.om.service.ratisPort }} + {{- end }} selector: {{- include "ozone.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: om diff --git a/charts/ozone/templates/om/om-statefulset.yaml b/charts/ozone/templates/om/om-statefulset.yaml index b71c58c..c3719c4 100644 --- a/charts/ozone/templates/om/om-statefulset.yaml +++ b/charts/ozone/templates/om/om-statefulset.yaml @@ -31,6 +31,7 @@ metadata: app.kubernetes.io/component: om spec: replicas: {{ .Values.om.replicas }} + podManagementPolicy: Parallel serviceName: {{ .Release.Name }}-om-headless selector: matchLabels: diff --git a/charts/ozone/templates/scm/scm-service-headless.yaml b/charts/ozone/templates/scm/scm-service-headless.yaml index dce5857..05a804d 100644 --- a/charts/ozone/templates/scm/scm-service-headless.yaml +++ b/charts/ozone/templates/scm/scm-service-headless.yaml @@ -28,6 +28,10 @@ spec: ports: - name: ui port: {{ .Values.scm.service.port }} + {{- if gt (int .Values.scm.replicas) 1 }} + - name: ratis + port: {{ .Values.om.service.ratisPort }} + {{- end }} selector: {{- include "ozone.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: scm diff --git a/charts/ozone/templates/scm/scm-statefulset.yaml b/charts/ozone/templates/scm/scm-statefulset.yaml index 457ba26..f0544f3 100644 --- a/charts/ozone/templates/scm/scm-statefulset.yaml +++ b/charts/ozone/templates/scm/scm-statefulset.yaml @@ -31,6 +31,7 @@ metadata: app.kubernetes.io/component: scm spec: replicas: {{ .Values.scm.replicas }} + podManagementPolicy: Parallel serviceName: {{ .Release.Name }}-scm-headless selector: matchLabels: @@ -61,6 +62,24 @@ spec: mountPath: {{ .Values.configuration.dir }} - name: {{ .Release.Name }}-scm mountPath: {{ .Values.scm.persistence.path }} + {{- if gt (int .Values.scm.replicas) 1 }} + - name: bootstrap + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + args: ["ozone", "scm", "--bootstrap"] + env: + {{- include "ozone.configuration.env" . | nindent 12 }} + {{- with $env }} + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- with $envFrom }} + envFrom: {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + volumeMounts: + - name: config + mountPath: {{ .Values.configuration.dir }} + - name: {{ .Release.Name }}-scm + mountPath: {{ .Values.scm.persistence.path }} + {{- end }} containers: - name: scm image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index 9c2dc4a..581af21 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -19,6 +19,8 @@ image: pullPolicy: IfNotPresent tag: ~ +namespace: default + imagePullSecrets: [] # Common environment variables (templated) @@ -123,6 +125,7 @@ om: type: ClusterIP port: 9874 nodePort: ~ + ratisPort: 9894 # Ozone Manager persistence persistence: # Enable persistence @@ -201,6 +204,7 @@ scm: type: ClusterIP port: 9876 nodePort: ~ + ratisPort: 9894 # Storage Container Manager persistence persistence: # Enable persistence From 8c2606c3e3a2ab098613968b600e215e3a0314ed Mon Sep 17 00:00:00 2001 From: TobiasPyttel Date: Tue, 5 Nov 2024 10:11:52 +0100 Subject: [PATCH 02/17] fixed namespace issue --- charts/ozone/templates/_helpers.tpl | 4 ++-- charts/ozone/values.yaml | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/charts/ozone/templates/_helpers.tpl b/charts/ozone/templates/_helpers.tpl index de3e086..24a54d6 100644 --- a/charts/ozone/templates/_helpers.tpl +++ b/charts/ozone/templates/_helpers.tpl @@ -88,7 +88,7 @@ app.kubernetes.io/instance: {{ .Release.Name }} value: {{ include "ozone.scm.cluster.ids" . }} {{- range $i, $val := until ( .Values.scm.replicas | int ) }} - name: {{ printf "OZONE-SITE.XML_ozone.scm.address.cluster1.ozone-scm-%d" $i }} - value: {{ printf "%s-scm-%d.%s-scm-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Values.namespace }} + value: {{ printf "%s-scm-%d.%s-scm-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Release.Namespace }} {{- end }} - name: OZONE-SITE.XML_ozone.scm.primordial.node.id value: "ozone-scm-0" @@ -109,7 +109,7 @@ app.kubernetes.io/instance: {{ .Release.Name }} value: {{ include "ozone.om.cluster.ids" . }} {{- range $i, $val := until ( .Values.om.replicas | int ) }} - name: {{ printf "OZONE-SITE.XML_ozone.om.address.cluster1.ozone-om-%d" $i }} - value: {{ printf "%s-om-%d.%s-om-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Values.namespace }} + value: {{ printf "%s-om-%d.%s-om-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Release.Namespace }} {{- end }} {{- else }} - name: OZONE-SITE.XML_ozone.om.address diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index 581af21..b569c03 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -19,8 +19,6 @@ image: pullPolicy: IfNotPresent tag: ~ -namespace: default - imagePullSecrets: [] # Common environment variables (templated) From d7e4322189f5866b3876acc2e840fabd34f87758 Mon Sep 17 00:00:00 2001 From: TobiasPyttel Date: Wed, 6 Nov 2024 18:42:09 +0100 Subject: [PATCH 03/17] fixed namespace issue --- .../templates/datanode/datanode-service-headless.yaml | 4 ++++ .../ozone/templates/datanode/datanode-statefulset.yaml | 4 ++++ charts/ozone/templates/om/om-service-headless.yaml | 2 +- charts/ozone/templates/om/om-statefulset.yaml | 4 ++++ charts/ozone/templates/scm/scm-service-headless.yaml | 10 +++++++++- charts/ozone/templates/scm/scm-statefulset.yaml | 8 ++++++++ charts/ozone/values.yaml | 2 -- 7 files changed, 30 insertions(+), 4 deletions(-) diff --git a/charts/ozone/templates/datanode/datanode-service-headless.yaml b/charts/ozone/templates/datanode/datanode-service-headless.yaml index 375abb1..1f023ca 100644 --- a/charts/ozone/templates/datanode/datanode-service-headless.yaml +++ b/charts/ozone/templates/datanode/datanode-service-headless.yaml @@ -28,6 +28,10 @@ spec: ports: - name: ui port: {{ .Values.datanode.service.port }} + - name: ratis-ipc + port: 9858 + - name: ipc + port: 9859 selector: {{- include "ozone.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: datanode diff --git a/charts/ozone/templates/datanode/datanode-statefulset.yaml b/charts/ozone/templates/datanode/datanode-statefulset.yaml index a9320d1..3ea6b6a 100644 --- a/charts/ozone/templates/datanode/datanode-statefulset.yaml +++ b/charts/ozone/templates/datanode/datanode-statefulset.yaml @@ -65,6 +65,10 @@ spec: ports: - name: ui containerPort: {{ .Values.datanode.service.port }} + - name: ratis-ipc + containerPort: 9858 + - name: ipc + containerPort: 9859 livenessProbe: httpGet: path: / diff --git a/charts/ozone/templates/om/om-service-headless.yaml b/charts/ozone/templates/om/om-service-headless.yaml index 0279555..3d50b70 100644 --- a/charts/ozone/templates/om/om-service-headless.yaml +++ b/charts/ozone/templates/om/om-service-headless.yaml @@ -30,7 +30,7 @@ spec: port: {{ .Values.om.service.port }} {{- if gt (int .Values.om.replicas) 1 }} - name: ratis - port: {{ .Values.om.service.ratisPort }} + port: 9872 {{- end }} selector: {{- include "ozone.selectorLabels" . | nindent 4 }} diff --git a/charts/ozone/templates/om/om-statefulset.yaml b/charts/ozone/templates/om/om-statefulset.yaml index c3719c4..978a212 100644 --- a/charts/ozone/templates/om/om-statefulset.yaml +++ b/charts/ozone/templates/om/om-statefulset.yaml @@ -72,6 +72,10 @@ spec: containerPort: 9862 - name: ui containerPort: {{ .Values.om.service.port }} + {{- if gt (int .Values.om.replicas) 1 }} + - name: ratis + containerPort: 9872 + {{- end }} livenessProbe: httpGet: path: / diff --git a/charts/ozone/templates/scm/scm-service-headless.yaml b/charts/ozone/templates/scm/scm-service-headless.yaml index 05a804d..8890ffd 100644 --- a/charts/ozone/templates/scm/scm-service-headless.yaml +++ b/charts/ozone/templates/scm/scm-service-headless.yaml @@ -28,9 +28,17 @@ spec: ports: - name: ui port: {{ .Values.scm.service.port }} + - name: rpc-datanode + port: 9861 + - name: block-client + port: 9863 + - name: rpc-client + port: 9860 {{- if gt (int .Values.scm.replicas) 1 }} - name: ratis - port: {{ .Values.om.service.ratisPort }} + port: 9894 + - name: grpc + port: 9895 {{- end }} selector: {{- include "ozone.selectorLabels" . | nindent 4 }} diff --git a/charts/ozone/templates/scm/scm-statefulset.yaml b/charts/ozone/templates/scm/scm-statefulset.yaml index f0544f3..6f10f5e 100644 --- a/charts/ozone/templates/scm/scm-statefulset.yaml +++ b/charts/ozone/templates/scm/scm-statefulset.yaml @@ -101,10 +101,18 @@ spec: ports: - name: rpc-client containerPort: 9860 + - name: block-client + containerPort: 9863 - name: rpc-datanode containerPort: 9861 - name: ui containerPort: {{ .Values.scm.service.port }} + {{- if gt (int .Values.scm.replicas) 1 }} + - name: ratis + containerPort: 9894 + - name: grpc + containerPort: 9895 + {{- end }} livenessProbe: httpGet: path: / diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index b569c03..9c2dc4a 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -123,7 +123,6 @@ om: type: ClusterIP port: 9874 nodePort: ~ - ratisPort: 9894 # Ozone Manager persistence persistence: # Enable persistence @@ -202,7 +201,6 @@ scm: type: ClusterIP port: 9876 nodePort: ~ - ratisPort: 9894 # Storage Container Manager persistence persistence: # Enable persistence From 4c27e2ded8e88eb57e65e71914034eee18865e28 Mon Sep 17 00:00:00 2001 From: TobiasPyttel Date: Mon, 2 Dec 2024 13:06:44 +0100 Subject: [PATCH 04/17] HDDS-11618. First running version of Helm managed OM HA --- charts/ozone/templates/_helpers.tpl | 146 +++++++++-- charts/ozone/templates/helm/om-manager.yaml | 239 ++++++++++++++++++ .../templates/om/om-service-headless.yaml | 2 - charts/ozone/templates/om/om-statefulset.yaml | 72 +++++- charts/ozone/templates/ozone-configmap.yaml | 4 + charts/ozone/values.yaml | 32 +++ 6 files changed, 458 insertions(+), 37 deletions(-) create mode 100644 charts/ozone/templates/helm/om-manager.yaml diff --git a/charts/ozone/templates/_helpers.tpl b/charts/ozone/templates/_helpers.tpl index 24a54d6..c457004 100644 --- a/charts/ozone/templates/_helpers.tpl +++ b/charts/ozone/templates/_helpers.tpl @@ -56,65 +56,111 @@ app.kubernetes.io/instance: {{ .Release.Name }} {{- $pods := list }} {{- $replicas := .Values.om.replicas | int }} {{- range $i := until $replicas }} - {{- $pods = append $pods (printf "ozone-om-%d" $i) }} + {{- $pods = append $pods (printf "%s-om-%d" $.Release.Name $i) }} {{- end }} {{- $pods | join "," }} {{- end }} -{{/* List of comma separated om ids */}} +{{/* List of comma separated scm ids */}} {{- define "ozone.scm.cluster.ids" -}} {{- $pods := list }} {{- $replicas := .Values.scm.replicas | int }} {{- range $i := until $replicas }} - {{- $pods = append $pods (printf "ozone-scm-%d" $i) }} + {{- $pods = append $pods (printf "%s-scm-%d" $.Release.Name $i) }} {{- end }} {{- $pods | join "," }} {{- end }} -{{/* Common configuration environment variables */}} -{{- define "ozone.configuration.env" -}} +{{/* List of decommission om nodes */}} +{{- define "ozone.om.decommissioned.nodes" -}} + {{- $nodes := list }} + {{- $statefulset := lookup "apps/v1" "StatefulSet" $.Release.Namespace (printf "%s-om" $.Release.Name) -}} + {{- if $statefulset }} + {{- $oldCount := $statefulset.spec.replicas | int -}} + {{- $newCount := .Values.om.replicas | int }} + {{- range $i := until $oldCount }} + {{- $minCount := max $newCount 1 -}} + {{- if ge $i $minCount }} + {{- $nodes = append $nodes (printf "%s-om-%d" $.Release.Name $i) }} + {{- end }} + {{- end }} + {{- end }} + {{- $nodes | join "," }} +{{- end }} + +{{/* List of bootstrap om nodes */}} +{{- define "ozone.om.bootstrap.nodes" -}} + {{- $nodes := list }} + {{- $statefulset := lookup "apps/v1" "StatefulSet" $.Release.Namespace (printf "%s-om" $.Release.Name) -}} + {{- if $statefulset }} + {{- $oldCount := $statefulset.spec.replicas | int -}} + {{- $newCount := .Values.om.replicas | int }} + {{- range $i := until $newCount }} + {{- if ge $i $oldCount }} + {{- $nodes = append $nodes (printf "%s-om-%d" $.Release.Name $i) }} + {{- end }} + {{- end }} + {{- end }} + {{- $nodes | join ","}} +{{- end }} + +{{/* List of decommission scm nodes */}} +{{- define "ozone.scm.decommissioned.nodes" -}} + {{- $nodes := list }} + {{- $statefulset := lookup "apps/v1" "StatefulSet" $.Release.Namespace (printf "%s-scm" $.Release.Name) -}} + {{- if $statefulset }} + {{- $oldCount := $statefulset.spec.replicas | int -}} + {{- $newCount := .Values.scm.replicas | int }} + {{- range $i := until $oldCount }} + {{- if ge $i $newCount }} + {{- $nodes = append $nodes (printf "%s-scm-%d" $.Release.Name $i) }} + {{- end }} + {{- end }} + {{- end }} + {{- $nodes | join "," -}} +{{- end }} + +{{/* List of decommission data nodes */}} +{{- define "ozone.data.decommissioned.hosts" -}} + {{- $hosts := list }} + {{- $statefulset := lookup "apps/v1" "StatefulSet" $.Release.Namespace (printf "%s-datanode" $.Release.Name) -}} + {{- if $statefulset }} + {{- $oldCount := $statefulset.spec.replicas | int -}} + {{- $newCount := .Values.datanode.replicas | int }} + {{- range $i := until $oldCount }} + {{- if ge $i $newCount }} + {{- $hosts = append $hosts (printf "%s-datanode-%d.%s-datanode-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Release.Namespace) }} + {{- end }} + {{- end }} + {{- end }} + {{- $hosts | join "," -}} +{{- end }} + +{{- define "ozone.configuration.env.common" -}} - name: OZONE-SITE.XML_hdds.datanode.dir value: /data/storage - name: OZONE-SITE.XML_ozone.scm.datanode.id.dir value: /data - name: OZONE-SITE.XML_ozone.metadata.dirs value: /data/metadata -{{- if gt (int .Values.scm.replicas) 1 }} - name: OZONE-SITE.XML_ozone.scm.ratis.enable value: "true" - name: OZONE-SITE.XML_ozone.scm.service.ids value: cluster1 - name: OZONE-SITE.XML_ozone.scm.nodes.cluster1 value: {{ include "ozone.scm.cluster.ids" . }} + {{/*- name: OZONE-SITE.XML_ozone.scm.skip.bootstrap.validation*/}} + {{/* value: {{ quote .Values.scm.skipBootstrapValidation }}*/}} {{- range $i, $val := until ( .Values.scm.replicas | int ) }} -- name: {{ printf "OZONE-SITE.XML_ozone.scm.address.cluster1.ozone-scm-%d" $i }} +- name: {{ printf "OZONE-SITE.XML_ozone.scm.address.cluster1.%s-scm-%d" $.Release.Name $i }} value: {{ printf "%s-scm-%d.%s-scm-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Release.Namespace }} {{- end }} - name: OZONE-SITE.XML_ozone.scm.primordial.node.id - value: "ozone-scm-0" -{{- else }} -- name: OZONE-SITE.XML_ozone.scm.block.client.address - value: {{ include "ozone.scm.pods" . }} -- name: OZONE-SITE.XML_ozone.scm.client.address - value: {{ include "ozone.scm.pods" . }} -- name: OZONE-SITE.XML_ozone.scm.names - value: {{ include "ozone.scm.pods" . }} -{{- end}} -{{- if gt (int .Values.om.replicas) 1 }} + value: {{ printf "%s-scm-0" $.Release.Name }} - name: OZONE-SITE.XML_ozone.om.ratis.enable value: "true" - name: OZONE-SITE.XML_ozone.om.service.ids value: cluster1 -- name: OZONE-SITE.XML_ozone.om.nodes.cluster1 - value: {{ include "ozone.om.cluster.ids" . }} -{{- range $i, $val := until ( .Values.om.replicas | int ) }} -- name: {{ printf "OZONE-SITE.XML_ozone.om.address.cluster1.ozone-om-%d" $i }} - value: {{ printf "%s-om-%d.%s-om-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Release.Namespace }} -{{- end }} -{{- else }} -- name: OZONE-SITE.XML_ozone.om.address - value: {{ include "ozone.om.pods" . }} -{{- end}} - name: OZONE-SITE.XML_hdds.scm.safemode.min.datanode value: "3" - name: OZONE-SITE.XML_ozone.datanode.pipeline.limit @@ -122,3 +168,49 @@ app.kubernetes.io/instance: {{ .Release.Name }} - name: OZONE-SITE.XML_dfs.datanode.use.datanode.hostname value: "true" {{- end }} + +{{/* Common configuration environment variables */}} +{{- define "ozone.configuration.env" -}} +{{- $bOmNodes := ternary (splitList "," (include "ozone.om.bootstrap.nodes" .)) (list) (ne "" (include "ozone.om.bootstrap.nodes" .)) }} +{{- $dOmNodes := ternary (splitList "," (include "ozone.om.decommissioned.nodes" .)) (list) (ne "" (include "ozone.om.decommissioned.nodes" .)) }} +{{- $activeOmNodes := ternary (splitList "," (include "ozone.om.cluster.ids" .)) (list) (ne "" (include "ozone.om.cluster.ids" .)) }} +{{ include "ozone.configuration.env.common" . }} +{{- if gt (len $dOmNodes) 0 }} +{{- $decomIds := $dOmNodes | join "," }} +- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.cluster1 + value: {{ $decomIds }} +{{- else}} +- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.cluster1 + value: "" +{{- end }} +- name: OZONE-SITE.XML_ozone.om.nodes.cluster1 + value: {{ $activeOmNodes | join "," }} +{{- range $tempId := $activeOmNodes }} +- name: {{ printf "OZONE-SITE.XML_ozone.om.address.cluster1.%s" $tempId }} + value: {{ printf "%s.%s-om-headless.%s.svc.cluster.local" $tempId $.Release.Name $.Release.Namespace }} +{{- end }} +{{- range $tempId := $dOmNodes }} +- name: {{ printf "OZONE-SITE.XML_ozone.om.address.cluster1.%s" $tempId }} + value: {{ printf "%s-helm-manager-decommission-%s-svc.%s.svc.cluster.local" $.Release.Name $tempId $.Release.Namespace }} +{{- end }} +{{- end }} + +{{/* Common configuration environment variables for pre hook */}} +{{- define "ozone.configuration.env.prehook" -}} +{{- $bOmNodes := ternary (splitList "," (include "ozone.om.bootstrap.nodes" .)) (list) (ne "" (include "ozone.om.bootstrap.nodes" .)) }} +{{- $dOmNodes := ternary (splitList "," (include "ozone.om.decommissioned.nodes" .)) (list) (ne "" (include "ozone.om.decommissioned.nodes" .)) }} +{{- $activeOmNodes := ternary (splitList "," (include "ozone.om.cluster.ids" .)) (list) (ne "" (include "ozone.om.cluster.ids" .)) }} +{{- $allOmNodes := concat $activeOmNodes $dOmNodes }} +{{ include "ozone.configuration.env.common" . }} +- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.cluster1 + value: "" +{{- range $tempId := $allOmNodes }} +- name: {{ printf "OZONE-SITE.XML_ozone.om.address.cluster1.%s" $tempId }} + value: {{ printf "%s.%s-om-headless.%s.svc.cluster.local" $tempId $.Release.Name $.Release.Namespace }} +{{- end }} +{{ $allOmNodes = append $allOmNodes "om-leader-transfer"}} +- name: OZONE-SITE.XML_ozone.om.nodes.cluster1 + value: {{ $allOmNodes | join "," }} +- name: "OZONE-SITE.XML_ozone.om.address.cluster1.om-leader-transfer" + value: localhost +{{- end }} \ No newline at end of file diff --git a/charts/ozone/templates/helm/om-manager.yaml b/charts/ozone/templates/helm/om-manager.yaml new file mode 100644 index 0000000..1e2d4a2 --- /dev/null +++ b/charts/ozone/templates/helm/om-manager.yaml @@ -0,0 +1,239 @@ +{{- if or .Values.om.persistence.enabled }} +{{- $dnodes := ternary (splitList "," (include "ozone.om.decommissioned.nodes" .)) (list) (ne "" (include "ozone.om.decommissioned.nodes" .)) }} +{{- $env := concat .Values.env .Values.helm.env }} +{{- $envFrom := concat .Values.envFrom .Values.helm.envFrom }} +{{- $nodeSelector := or .Values.helm.nodeSelector .Values.nodeSelector }} +{{- $affinity := or .Values.helm.affinity .Values.affinity }} +{{- $tolerations := or .Values.helm.tolerations .Values.tolerations }} +{{- $securityContext := or .Values.helm.securityContext .Values.securityContext }} +{{- if and (gt (len $dnodes) 0) ( .Values.om.persistence.enabled) }} + +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ printf "%s-helm-manager-leader-transfer" $.Release.Name }} + labels: + {{- include "ozone.labels" $ | nindent 4 }} + app.kubernetes.io/component: helm-manager + annotations: + "helm.sh/hook": pre-upgrade + "helm.sh/hook-weight": "0" + "helm.sh/hook-delete-policy": hook-succeeded, hook-failed +spec: + backoffLimit: {{ $.Values.helm.backoffLimit }} + template: + metadata: + labels: + {{- include "ozone.selectorLabels" $ | nindent 8 }} + app.kubernetes.io/component: helm-manager + spec: + containers: + - name: om-leader-transfer + image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag | default $.Chart.AppVersion }}" + imagePullPolicy: {{ $.Values.image.pullPolicy }} + {{- with $.Values.om.command }} + command: {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + args: + - sh + - -c + - | + set -e + exec ozone admin om transfer -id=cluster1 -n={{ $.Release.Name }}-om-0 + env: + {{- include "ozone.configuration.env.prehook" $ | nindent 12 }} + {{- with $env }} + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- with $envFrom }} + envFrom: {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + ports: + - name: data-ratis-ipc + containerPort: 9858 + - name: data-ipc + containerPort: 9859 + - name: scm-rpc-client + containerPort: 9860 + - name: scm-block-cl + containerPort: 9863 + - name: scm-rpc-data + containerPort: 9861 + - name: scm-ratis + containerPort: 9894 + - name: scm-grpc + containerPort: 9895 + - name: om-rpc + containerPort: 9862 + - name: om-ratis + containerPort: 9872 + volumeMounts: + - name: config + mountPath: {{ $.Values.configuration.dir }} + - name: om-data + mountPath: {{ $.Values.om.persistence.path }} + {{- with $nodeSelector }} + nodeSelector: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $affinity }} + affinity: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $tolerations }} + tolerations: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $securityContext }} + securityContext: {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: om-data + emptyDir: { } + - name: config + projected: + sources: + - configMap: + name: {{ $.Release.Name }}-ozone + {{- with $.Values.configuration.filesFrom }} + {{- tpl (toYaml .) $ | nindent 14 }} + {{- end }} + restartPolicy: Never + +{{- range $dnode := $dnodes }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ printf "%s-helm-manager-decommission-%s" $.Release.Name $dnode }} + labels: + {{- include "ozone.labels" $ | nindent 4 }} + app.kubernetes.io/component: helm-manager + annotations: + "helm.sh/hook": post-upgrade + "helm.sh/hook-weight": "0" + "helm.sh/hook-delete-policy": hook-succeeded, hook-failed +spec: + backoffLimit: {{ $.Values.helm.backoffLimit }} + template: + metadata: + labels: + {{- include "ozone.selectorLabels" $ | nindent 8 }} + app.kubernetes.io/component: helm-manager + spec: + containers: + - name: om-decommission + image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag | default $.Chart.AppVersion }}" + imagePullPolicy: {{ $.Values.image.pullPolicy }} + {{- with $.Values.om.command }} + command: {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + args: + - sh + - -c + - | + set -e + decommission_finalizer() { + echo "Init decommission finalizer process..." + while true; do + IFS= read -r line; + echo "$line" + if echo "$line" | grep -q "Successfully decommissioned OM {{ $dnode }}"; then + echo "{{ $dnode }} was successfully decommissioned!" + if [ -d /old{{ $.Values.om.persistence.path }} ]; then + echo "Delete old data on pvc to enable rescheduling without manual PVC deletion!" + rm -rf /old{{ $.Values.om.persistence.path }}/* + echo "Data deleted!" + fi + break; + fi + done + echo "Decommission finalizer process finished!" + exit 0 + } + exec ozone admin om decommission -id=cluster1 -nodeid={{ $dnode }} -hostname={{ printf "%s-helm-manager-decommission-%s-svc.%s.svc.cluster.local" $.Release.Name $dnode $.Release.Namespace }} | decommission_finalizer + env: + {{- include "ozone.configuration.env" $ | nindent 12 }} + {{- with $env }} + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- with $envFrom }} + envFrom: {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + ports: + - name: data-ratis-ipc + containerPort: 9858 + - name: data-ipc + containerPort: 9859 + - name: scm-rpc-client + containerPort: 9860 + - name: scm-block-cl + containerPort: 9863 + - name: scm-rpc-data + containerPort: 9861 + - name: scm-ratis + containerPort: 9894 + - name: scm-grpc + containerPort: 9895 + - name: om-rpc + containerPort: 9862 + - name: om-ratis + containerPort: 9872 + volumeMounts: + - name: config + mountPath: {{ $.Values.configuration.dir }} + - name: om-data + mountPath: {{ $.Values.om.persistence.path }} + - name: om-data-old + mountPath: /old{{ $.Values.om.persistence.path }} + {{- with $nodeSelector }} + nodeSelector: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $affinity }} + affinity: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $tolerations }} + tolerations: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $securityContext }} + securityContext: {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: om-data-old + persistentVolumeClaim: + claimName: {{ $.Release.Name}}-om-{{ $dnode }} + - name: om-data + emptyDir: { } + - name: config + projected: + sources: + - configMap: + name: {{ $.Release.Name }}-ozone + {{- with $.Values.configuration.filesFrom }} + {{- tpl (toYaml .) $ | nindent 14 }} + {{- end }} + restartPolicy: Never + +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ printf "%s-helm-manager-decommission-%s-svc" $.Release.Name $dnode }} + labels: + {{- include "ozone.labels" $ | nindent 4 }} + app.kubernetes.io/component: helm-manager + annotations: + "helm.sh/hook": post-upgrade + "helm.sh/hook-weight": "-10" + "helm.sh/hook-delete-policy": hook-succeeded, hook-failed +spec: + selector: + job-name: {{ printf "%s-helm-manager-decommission-%s" $.Release.Name $dnode }} + ports: + - name: rpc + port: 9862 + targetPort: 9862 + - name: ratis + port: 9872 + targetPort: 9872 + type: ClusterIP +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/ozone/templates/om/om-service-headless.yaml b/charts/ozone/templates/om/om-service-headless.yaml index 3d50b70..df9ce55 100644 --- a/charts/ozone/templates/om/om-service-headless.yaml +++ b/charts/ozone/templates/om/om-service-headless.yaml @@ -28,10 +28,8 @@ spec: ports: - name: ui port: {{ .Values.om.service.port }} - {{- if gt (int .Values.om.replicas) 1 }} - name: ratis port: 9872 - {{- end }} selector: {{- include "ozone.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: om diff --git a/charts/ozone/templates/om/om-statefulset.yaml b/charts/ozone/templates/om/om-statefulset.yaml index 978a212..43f86ae 100644 --- a/charts/ozone/templates/om/om-statefulset.yaml +++ b/charts/ozone/templates/om/om-statefulset.yaml @@ -22,6 +22,8 @@ {{- $affinity := or .Values.om.affinity .Values.affinity }} {{- $tolerations := or .Values.om.tolerations .Values.tolerations }} {{- $securityContext := or .Values.om.securityContext .Values.securityContext }} +{{- $bnodes := ternary (splitList "," (include "ozone.om.bootstrap.nodes" .)) (list) (ne "" (include "ozone.om.bootstrap.nodes" .)) }} +{{- $activeNodes := ternary (splitList "," (include "ozone.om.cluster.ids" .)) (list) (ne "" (include "ozone.om.cluster.ids" .)) }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -31,7 +33,6 @@ metadata: app.kubernetes.io/component: om spec: replicas: {{ .Values.om.replicas }} - podManagementPolicy: Parallel serviceName: {{ .Release.Name }}-om-headless selector: matchLabels: @@ -40,7 +41,7 @@ spec: template: metadata: annotations: - checksum/config: {{ include (print $.Template.BasePath "/ozone-configmap.yaml") . | sha256sum }} + checksum/config: {{ include (print $.Template.BasePath "/ozone-configmap.yaml") . | cat (include "ozone.configuration.env" .) | sha256sum }} labels: {{- include "ozone.selectorLabels" . | nindent 8 }} app.kubernetes.io/component: om @@ -52,9 +53,66 @@ spec: {{- with .Values.om.command }} command: {{- tpl (toYaml .) $ | nindent 12 }} {{- end }} - {{- with .Values.om.args }} - args: {{- tpl (toYaml .) $ | nindent 12 }} - {{- end }} + args: + - sh + - -c + - | + set -e + HELM_MANAGER_PATH="{{ .Values.om.persistence.path }}{{ .Values.helm.persistence.path }}" + HELM_MANAGER_BOOTSTRAPPED_FILE="$HELM_MANAGER_PATH/bootstrapped" + {{- $flattenedArgs := join " " $.Values.om.args }} + {{- if and (.Values.om.persistence.enabled) (gt (len $bnodes) 0) }} + joinArr() { + local IFS="," + echo "$*" + } + bootstrap_finalizer() { + echo "Init bootrap finalizer process..." + while true; do + IFS= read -r line; + echo "$line" + if echo "$line" | grep -q "Successfully bootstrapped OM $HOSTNAME and joined the Ratis group"; then + echo "$HOSTNAME was successfully bootstrapped!" + mkdir -p "$HELM_MANAGER_PATH" + touch "$HELM_MANAGER_BOOTSTRAPPED_FILE" + break; + fi + done + echo "Bootstrap finalizer process finished!" + exit 0 + } + bootstrapHosts="{{ join "," $bnodes }}" + echo "Need to handle bootstrap for nodes $bootstrapHosts" + IFS=',' read -r -a hostArray <<< "$bootstrapHosts" + doBootstrap=false + nodesConfigOverwriteList=() + for host in "${hostArray[@]}"; do + if [[ "$host" == "$HOSTNAME" ]]; then + doBootstrap=true + activeNodesConfig="{{ join "," $activeNodes }}" + IFS=',' read -r -a overwriteArray <<< "$activeNodesConfig" + for overwriteHost in "${overwriteArray[@]}"; do + nodesConfigOverwriteList+=("$overwriteHost") + if [[ "$overwriteHost" == "$HOSTNAME" ]]; then + break; + fi + done + break + fi + done + if [ "$doBootstrap" = true ] && [ ! -f "$HELM_MANAGER_BOOTSTRAPPED_FILE" ]; then + echo "$HOSTNAME must be started with bootstrap arg!" + overwriteCmd="$(joinArr "${nodesConfigOverwriteList[@]}")" + echo "Bootstrapping node config for this node: $overwriteCmd" + exec {{ printf "%s --set ozone.om.nodes.cluster1=" $flattenedArgs }}"$overwriteCmd" --bootstrap | bootstrap_finalizer + else + echo "$HOSTNAME must not be started with bootstrap arg!" + exec {{ join " " $.Values.om.args }} + fi + {{- else }} + echo "No bootstrap handling needed!" + exec {{ join " " $.Values.om.args }} + {{- end }} env: {{- include "ozone.configuration.env" . | nindent 12 }} - name: WAITFOR @@ -72,10 +130,8 @@ spec: containerPort: 9862 - name: ui containerPort: {{ .Values.om.service.port }} - {{- if gt (int .Values.om.replicas) 1 }} - name: ratis containerPort: 9872 - {{- end }} livenessProbe: httpGet: path: / @@ -109,7 +165,7 @@ spec: {{- end }} {{- if not .Values.om.persistence.enabled }} - name: {{ .Release.Name }}-om - emptyDir: {} + emptyDir: { } {{- end }} {{- if .Values.om.persistence.enabled }} volumeClaimTemplates: diff --git a/charts/ozone/templates/ozone-configmap.yaml b/charts/ozone/templates/ozone-configmap.yaml index dbee026..f5b12d8 100644 --- a/charts/ozone/templates/ozone-configmap.yaml +++ b/charts/ozone/templates/ozone-configmap.yaml @@ -21,5 +21,9 @@ kind: ConfigMap metadata: name: {{ .Release.Name }}-ozone labels: {{- include "ozone.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": pre-upgrade, post-install + "helm.sh/hook-weight": "-10" + "helm.sh/resource-policy": keep data: {{- tpl (toYaml .Values.configuration.files) $ | nindent 4 }} diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index 9c2dc4a..8645db3 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -214,3 +214,35 @@ scm: size: 10Gi # The name of a specific storage class name to use storageClassName: ~ + +# Helm Manager configuration +helm: + # Additional Helm Manager environment variables (templated) + env: [] + # Additional Helm Manager envFrom items to set up environment variables (templated) + envFrom: [] + # Constrain Helm Manager pods to nodes with specific node labels + nodeSelector: {} + # Constrain Helm Manager pods to nodes by affinity/anti-affinity rules + affinity: {} + # Allow to schedule Helm Manager pods on nodes with matching taints + tolerations: [] + # Helm Manager security context (overwrites common security context) + securityContext: {} + # Decommissioning is handled with a post-upgrade helm hook job. + # To avoid endless retries of decommissioning, this limit is set. + # This can happen if PVC has been deleted or is not reachable. + # This is used for decommissioning OM + backoffLimit: 5 + # Helm Manager persistence (this is enabled automatically if al least one + # of datanode, scm or om is enabled) + persistence: + # Persistence access modes + accessModes: + - ReadWriteOnce + # Path for Helm Manager volume mount + path: /metadata/helm + # Volume size + size: 5Mi + # The name of a specific storage class name to use + storageClassName: ~ \ No newline at end of file From d60087d9f9d68811f38bdaad4d85d826ee5ae2fa Mon Sep 17 00:00:00 2001 From: TobiasPyttel Date: Tue, 3 Dec 2024 10:33:28 +0100 Subject: [PATCH 05/17] HDDS-11618. Removed unused helm manager settings --- charts/ozone/values.yaml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index 8645db3..2eefb4e 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -237,12 +237,5 @@ helm: # Helm Manager persistence (this is enabled automatically if al least one # of datanode, scm or om is enabled) persistence: - # Persistence access modes - accessModes: - - ReadWriteOnce # Path for Helm Manager volume mount - path: /metadata/helm - # Volume size - size: 5Mi - # The name of a specific storage class name to use - storageClassName: ~ \ No newline at end of file + path: /metadata/helm \ No newline at end of file From 591c78265af748440c3c04900c146c6012db3805 Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Tue, 16 Sep 2025 20:36:29 +0530 Subject: [PATCH 06/17] Fix storage persistence, make HA default --- .../datanode/datanode-statefulset.yaml | 2 +- charts/ozone/templates/om/om-statefulset.yaml | 2 +- .../ozone/templates/s3g/s3g-statefulset.yaml | 2 +- charts/ozone/values.yaml | 88 +++++++++---------- 4 files changed, 47 insertions(+), 47 deletions(-) diff --git a/charts/ozone/templates/datanode/datanode-statefulset.yaml b/charts/ozone/templates/datanode/datanode-statefulset.yaml index 28407b8..a91834a 100644 --- a/charts/ozone/templates/datanode/datanode-statefulset.yaml +++ b/charts/ozone/templates/datanode/datanode-statefulset.yaml @@ -121,7 +121,7 @@ spec: resources: requests: storage: {{ .Values.datanode.persistence.size }} - {{- with .Values.scm.persistence.storageClassName }} + {{- with .Values.datanode.persistence.storageClassName }} storageClassName: {{ . }} {{- end }} {{- end }} diff --git a/charts/ozone/templates/om/om-statefulset.yaml b/charts/ozone/templates/om/om-statefulset.yaml index 4c201dc..777d4dc 100644 --- a/charts/ozone/templates/om/om-statefulset.yaml +++ b/charts/ozone/templates/om/om-statefulset.yaml @@ -184,7 +184,7 @@ spec: resources: requests: storage: {{ .Values.om.persistence.size }} - {{- with .Values.scm.persistence.storageClassName }} + {{- with .Values.om.persistence.storageClassName }} storageClassName: {{ . }} {{- end }} {{- end }} \ No newline at end of file diff --git a/charts/ozone/templates/s3g/s3g-statefulset.yaml b/charts/ozone/templates/s3g/s3g-statefulset.yaml index a79ab3f..a9a2086 100644 --- a/charts/ozone/templates/s3g/s3g-statefulset.yaml +++ b/charts/ozone/templates/s3g/s3g-statefulset.yaml @@ -119,7 +119,7 @@ spec: resources: requests: storage: {{ .Values.s3g.persistence.size }} - {{- with .Values.scm.persistence.storageClassName }} + {{- with .Values.s3g.persistence.storageClassName }} storageClassName: {{ . }} {{- end }} {{- end }} \ No newline at end of file diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index 28fe909..366b623 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -105,7 +105,7 @@ datanode: # Ozone Manager configuration om: # Number of Ozone Manager replicas - replicas: 1 + replicas: 3 # Command to launch Ozone Manager (templated) command: ~ # Arguments to launch Ozone Manager (templated) @@ -145,6 +145,49 @@ om: # The name of a specific storage class name to use storageClassName: ~ +# Storage Container Manager configuration +scm: + # Number of Storage Container Manager replicas + replicas: 3 + # Command to launch Storage Container Manager (templated) + command: ~ + # Arguments to launch Storage Container Manager (templated) + args: ["ozone", "scm"] + # Additional Storage Container Manager environment variables (templated) + env: [] + # Additional Storage Container Manager envFrom items to set up environment variables (templated) + envFrom: [] + # Storage Container Manager resource requests and limits + resources: {} + # Constrain Storage Container Manager pods to nodes with specific node labels + nodeSelector: {} + # Constrain Storage Container Manager pods to nodes by affinity/anti-affinity rules + affinity: {} + # Allow to schedule Storage Container Manager pods on nodes with matching taints + tolerations: [] + # Storage Container Manager security context (overwrites common security context) + securityContext: {} + # Storage Container Manager service configuration + service: + type: ClusterIP + port: 9876 + nodePort: ~ + labels: {} + annotations: {} + # Storage Container Manager persistence + persistence: + # Enable persistence + enabled: false + # Persistence access modes + accessModes: + - ReadWriteOnce + # Path for Storage Container Manager volume mount + path: /data + # Volume size + size: 10Gi + # The name of a specific storage class name to use + storageClassName: ~ + # S3 Gateway configuration s3g: # Number of S3 Gateway replicas @@ -195,49 +238,6 @@ s3g: # The name of a specific storage class name to use storageClassName: ~ -# Storage Container Manager configuration -scm: - # Number of Storage Container Manager replicas - replicas: 1 - # Command to launch Storage Container Manager (templated) - command: ~ - # Arguments to launch Storage Container Manager (templated) - args: ["ozone", "scm"] - # Additional Storage Container Manager environment variables (templated) - env: [] - # Additional Storage Container Manager envFrom items to set up environment variables (templated) - envFrom: [] - # Storage Container Manager resource requests and limits - resources: {} - # Constrain Storage Container Manager pods to nodes with specific node labels - nodeSelector: {} - # Constrain Storage Container Manager pods to nodes by affinity/anti-affinity rules - affinity: {} - # Allow to schedule Storage Container Manager pods on nodes with matching taints - tolerations: [] - # Storage Container Manager security context (overwrites common security context) - securityContext: {} - # Storage Container Manager service configuration - service: - type: ClusterIP - port: 9876 - nodePort: ~ - labels: {} - annotations: {} - # Storage Container Manager persistence - persistence: - # Enable persistence - enabled: false - # Persistence access modes - accessModes: - - ReadWriteOnce - # Path for Storage Container Manager volume mount - path: /data - # Volume size - size: 10Gi - # The name of a specific storage class name to use - storageClassName: ~ - # Helm Manager configuration helm: # Additional Helm Manager environment variables (templated) From d81262baddd0a93f372d763b2471b6b3901b8bab Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Tue, 16 Sep 2025 20:51:51 +0530 Subject: [PATCH 07/17] newline at end of file --- charts/ozone/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index 366b623..edec31f 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -261,4 +261,4 @@ helm: # of datanode, scm or om is enabled) persistence: # Path for Helm Manager volume mount - path: /metadata/helm \ No newline at end of file + path: /metadata/helm From 3c7d83c3c59786f51ef968174d2989b98c3c2be9 Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Fri, 19 Sep 2025 14:52:19 +0530 Subject: [PATCH 08/17] replicas of om and scm 1 --- charts/ozone/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index edec31f..18894eb 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -105,7 +105,7 @@ datanode: # Ozone Manager configuration om: # Number of Ozone Manager replicas - replicas: 3 + replicas: 1 # Command to launch Ozone Manager (templated) command: ~ # Arguments to launch Ozone Manager (templated) @@ -148,7 +148,7 @@ om: # Storage Container Manager configuration scm: # Number of Storage Container Manager replicas - replicas: 3 + replicas: 1 # Command to launch Storage Container Manager (templated) command: ~ # Arguments to launch Storage Container Manager (templated) From d4aacbac6fe1785a371a7fc95a5cd18a26d2255f Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Fri, 19 Sep 2025 20:56:37 +0530 Subject: [PATCH 09/17] change hook from post to pre install for configmap --- charts/ozone/templates/ozone-configmap.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/ozone/templates/ozone-configmap.yaml b/charts/ozone/templates/ozone-configmap.yaml index f5b12d8..94f101a 100644 --- a/charts/ozone/templates/ozone-configmap.yaml +++ b/charts/ozone/templates/ozone-configmap.yaml @@ -22,7 +22,7 @@ metadata: name: {{ .Release.Name }}-ozone labels: {{- include "ozone.labels" . | nindent 4 }} annotations: - "helm.sh/hook": pre-upgrade, post-install + "helm.sh/hook": pre-upgrade, pre-install "helm.sh/hook-weight": "-10" "helm.sh/resource-policy": keep data: From beb5ccf5aac110c625062c13c5a35117b2f1f52a Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Fri, 19 Sep 2025 21:12:07 +0530 Subject: [PATCH 10/17] make HA default --- charts/ozone/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index 18894eb..edec31f 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -105,7 +105,7 @@ datanode: # Ozone Manager configuration om: # Number of Ozone Manager replicas - replicas: 1 + replicas: 3 # Command to launch Ozone Manager (templated) command: ~ # Arguments to launch Ozone Manager (templated) @@ -148,7 +148,7 @@ om: # Storage Container Manager configuration scm: # Number of Storage Container Manager replicas - replicas: 1 + replicas: 3 # Command to launch Storage Container Manager (templated) command: ~ # Arguments to launch Storage Container Manager (templated) From fb737014c1e706e48a3f1eeec8fdcf2d16656307 Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Thu, 23 Oct 2025 02:15:27 +0530 Subject: [PATCH 11/17] split ha upgrade jobs into different files --- ...-manager.yaml => om-decommission-job.yaml} | 118 +----------------- .../helm/om-decommission-service.yaml | 30 +++++ .../helm/om-leader-transfer-job.yaml | 102 +++++++++++++++ 3 files changed, 134 insertions(+), 116 deletions(-) rename charts/ozone/templates/helm/{om-manager.yaml => om-decommission-job.yaml} (56%) create mode 100644 charts/ozone/templates/helm/om-decommission-service.yaml create mode 100644 charts/ozone/templates/helm/om-leader-transfer-job.yaml diff --git a/charts/ozone/templates/helm/om-manager.yaml b/charts/ozone/templates/helm/om-decommission-job.yaml similarity index 56% rename from charts/ozone/templates/helm/om-manager.yaml rename to charts/ozone/templates/helm/om-decommission-job.yaml index 1e2d4a2..573c559 100644 --- a/charts/ozone/templates/helm/om-manager.yaml +++ b/charts/ozone/templates/helm/om-decommission-job.yaml @@ -1,4 +1,4 @@ -{{- if or .Values.om.persistence.enabled }} +{{- if .Values.om.persistence.enabled }} {{- $dnodes := ternary (splitList "," (include "ozone.om.decommissioned.nodes" .)) (list) (ne "" (include "ozone.om.decommissioned.nodes" .)) }} {{- $env := concat .Values.env .Values.helm.env }} {{- $envFrom := concat .Values.envFrom .Values.helm.envFrom }} @@ -6,97 +6,7 @@ {{- $affinity := or .Values.helm.affinity .Values.affinity }} {{- $tolerations := or .Values.helm.tolerations .Values.tolerations }} {{- $securityContext := or .Values.helm.securityContext .Values.securityContext }} -{{- if and (gt (len $dnodes) 0) ( .Values.om.persistence.enabled) }} - -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ printf "%s-helm-manager-leader-transfer" $.Release.Name }} - labels: - {{- include "ozone.labels" $ | nindent 4 }} - app.kubernetes.io/component: helm-manager - annotations: - "helm.sh/hook": pre-upgrade - "helm.sh/hook-weight": "0" - "helm.sh/hook-delete-policy": hook-succeeded, hook-failed -spec: - backoffLimit: {{ $.Values.helm.backoffLimit }} - template: - metadata: - labels: - {{- include "ozone.selectorLabels" $ | nindent 8 }} - app.kubernetes.io/component: helm-manager - spec: - containers: - - name: om-leader-transfer - image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag | default $.Chart.AppVersion }}" - imagePullPolicy: {{ $.Values.image.pullPolicy }} - {{- with $.Values.om.command }} - command: {{- tpl (toYaml .) $ | nindent 12 }} - {{- end }} - args: - - sh - - -c - - | - set -e - exec ozone admin om transfer -id=cluster1 -n={{ $.Release.Name }}-om-0 - env: - {{- include "ozone.configuration.env.prehook" $ | nindent 12 }} - {{- with $env }} - {{- tpl (toYaml .) $ | nindent 12 }} - {{- end }} - {{- with $envFrom }} - envFrom: {{- tpl (toYaml .) $ | nindent 12 }} - {{- end }} - ports: - - name: data-ratis-ipc - containerPort: 9858 - - name: data-ipc - containerPort: 9859 - - name: scm-rpc-client - containerPort: 9860 - - name: scm-block-cl - containerPort: 9863 - - name: scm-rpc-data - containerPort: 9861 - - name: scm-ratis - containerPort: 9894 - - name: scm-grpc - containerPort: 9895 - - name: om-rpc - containerPort: 9862 - - name: om-ratis - containerPort: 9872 - volumeMounts: - - name: config - mountPath: {{ $.Values.configuration.dir }} - - name: om-data - mountPath: {{ $.Values.om.persistence.path }} - {{- with $nodeSelector }} - nodeSelector: {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $affinity }} - affinity: {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $tolerations }} - tolerations: {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $securityContext }} - securityContext: {{- toYaml . | nindent 8 }} - {{- end }} - volumes: - - name: om-data - emptyDir: { } - - name: config - projected: - sources: - - configMap: - name: {{ $.Release.Name }}-ozone - {{- with $.Values.configuration.filesFrom }} - {{- tpl (toYaml .) $ | nindent 14 }} - {{- end }} - restartPolicy: Never - +{{- if (gt (len $dnodes) 0) }} {{- range $dnode := $dnodes }} --- apiVersion: batch/v1 @@ -210,30 +120,6 @@ spec: {{- tpl (toYaml .) $ | nindent 14 }} {{- end }} restartPolicy: Never - ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ printf "%s-helm-manager-decommission-%s-svc" $.Release.Name $dnode }} - labels: - {{- include "ozone.labels" $ | nindent 4 }} - app.kubernetes.io/component: helm-manager - annotations: - "helm.sh/hook": post-upgrade - "helm.sh/hook-weight": "-10" - "helm.sh/hook-delete-policy": hook-succeeded, hook-failed -spec: - selector: - job-name: {{ printf "%s-helm-manager-decommission-%s" $.Release.Name $dnode }} - ports: - - name: rpc - port: 9862 - targetPort: 9862 - - name: ratis - port: 9872 - targetPort: 9872 - type: ClusterIP {{- end }} {{- end }} {{- end }} \ No newline at end of file diff --git a/charts/ozone/templates/helm/om-decommission-service.yaml b/charts/ozone/templates/helm/om-decommission-service.yaml new file mode 100644 index 0000000..c5b9209 --- /dev/null +++ b/charts/ozone/templates/helm/om-decommission-service.yaml @@ -0,0 +1,30 @@ +{{- if .Values.om.persistence.enabled }} +{{- $dnodes := ternary (splitList "," (include "ozone.om.decommissioned.nodes" .)) (list) (ne "" (include "ozone.om.decommissioned.nodes" .)) }} +{{- if (gt (len $dnodes) 0) }} +{{- range $dnode := $dnodes }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ printf "%s-helm-manager-decommission-%s-svc" $.Release.Name $dnode }} + labels: + {{- include "ozone.labels" $ | nindent 4 }} + app.kubernetes.io/component: helm-manager + annotations: + "helm.sh/hook": post-upgrade + "helm.sh/hook-weight": "-10" + "helm.sh/hook-delete-policy": hook-succeeded, hook-failed +spec: + selector: + job-name: {{ printf "%s-helm-manager-decommission-%s" $.Release.Name $dnode }} + ports: + - name: rpc + port: 9862 + targetPort: 9862 + - name: ratis + port: 9872 + targetPort: 9872 + type: ClusterIP +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/ozone/templates/helm/om-leader-transfer-job.yaml b/charts/ozone/templates/helm/om-leader-transfer-job.yaml new file mode 100644 index 0000000..013bb82 --- /dev/null +++ b/charts/ozone/templates/helm/om-leader-transfer-job.yaml @@ -0,0 +1,102 @@ +{{- if .Values.om.persistence.enabled }} +{{- $dnodes := ternary (splitList "," (include "ozone.om.decommissioned.nodes" .)) (list) (ne "" (include "ozone.om.decommissioned.nodes" .)) }} +{{- $env := concat .Values.env .Values.helm.env }} +{{- $envFrom := concat .Values.envFrom .Values.helm.envFrom }} +{{- $nodeSelector := or .Values.helm.nodeSelector .Values.nodeSelector }} +{{- $affinity := or .Values.helm.affinity .Values.affinity }} +{{- $tolerations := or .Values.helm.tolerations .Values.tolerations }} +{{- $securityContext := or .Values.helm.securityContext .Values.securityContext }} +{{- if (gt (len $dnodes) 0) }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ printf "%s-helm-manager-leader-transfer" $.Release.Name }} + labels: + {{- include "ozone.labels" $ | nindent 4 }} + app.kubernetes.io/component: helm-manager + annotations: + "helm.sh/hook": pre-upgrade + "helm.sh/hook-weight": "0" + "helm.sh/hook-delete-policy": hook-succeeded,hook-failed +spec: + backoffLimit: {{ $.Values.helm.backoffLimit }} + template: + metadata: + labels: + {{- include "ozone.selectorLabels" $ | nindent 8 }} + app.kubernetes.io/component: helm-manager + spec: + containers: + - name: om-leader-transfer + image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag | default $.Chart.AppVersion }}" + imagePullPolicy: {{ $.Values.image.pullPolicy }} + {{- with $.Values.om.command }} + command: {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + args: + - sh + - -c + - | + set -e + exec ozone admin om transfer -id=cluster1 -n={{ $.Release.Name }}-om-0 + env: + {{- include "ozone.configuration.env.prehook" $ | nindent 12 }} + {{- with $env }} + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- with $envFrom }} + envFrom: + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + ports: + - name: data-ratis-ipc + containerPort: 9858 + - name: data-ipc + containerPort: 9859 + - name: scm-rpc-client + containerPort: 9860 + - name: scm-block-cl + containerPort: 9863 + - name: scm-rpc-data + containerPort: 9861 + - name: scm-ratis + containerPort: 9894 + - name: scm-grpc + containerPort: 9895 + - name: om-rpc + containerPort: 9862 + - name: om-ratis + containerPort: 9872 + volumeMounts: + - name: config + mountPath: {{ $.Values.configuration.dir }} + - name: om-data + mountPath: {{ $.Values.om.persistence.path }} + {{- with $nodeSelector }} + nodeSelector: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $affinity }} + affinity: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $tolerations }} + tolerations: {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $securityContext }} + securityContext: {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: om-data + emptyDir: { } + - name: config + projected: + sources: + - configMap: + name: {{ $.Release.Name }}-ozone + {{- with $.Values.configuration.filesFrom }} + {{- tpl (toYaml .) $ | nindent 14 }} + {{- end }} + restartPolicy: Never + +{{- end }} +{{- end }} \ No newline at end of file From 41f951956ac8dfe60332855a8df63d1381afcb6b Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Thu, 23 Oct 2025 03:16:02 +0530 Subject: [PATCH 12/17] make port and cluster id parameterized --- charts/ozone/templates/_helpers.tpl | 26 ++++++++-------- .../datanode/datanode-service-headless.yaml | 4 +-- .../datanode/datanode-statefulset.yaml | 4 +-- .../templates/helm/om-decommission-job.yaml | 2 +- .../helm/om-leader-transfer-job.yaml | 2 +- charts/ozone/templates/om/om-statefulset.yaml | 2 +- charts/ozone/values.yaml | 31 +++++++++++++++++-- 7 files changed, 49 insertions(+), 22 deletions(-) diff --git a/charts/ozone/templates/_helpers.tpl b/charts/ozone/templates/_helpers.tpl index 04e2a50..44beea0 100644 --- a/charts/ozone/templates/_helpers.tpl +++ b/charts/ozone/templates/_helpers.tpl @@ -146,13 +146,13 @@ app.kubernetes.io/instance: {{ .Release.Name }} - name: OZONE-SITE.XML_ozone.scm.ratis.enable value: "true" - name: OZONE-SITE.XML_ozone.scm.service.ids - value: cluster1 -- name: OZONE-SITE.XML_ozone.scm.nodes.cluster1 + value: {{ .Values.clusterId }} +- name: OZONE-SITE.XML_ozone.scm.nodes.{{ .Values.clusterId }} value: {{ include "ozone.scm.cluster.ids" . }} {{/*- name: OZONE-SITE.XML_ozone.scm.skip.bootstrap.validation*/}} {{/* value: {{ quote .Values.scm.skipBootstrapValidation }}*/}} {{- range $i, $val := until ( .Values.scm.replicas | int ) }} -- name: {{ printf "OZONE-SITE.XML_ozone.scm.address.cluster1.%s-scm-%d" $.Release.Name $i }} +- name: {{ printf "OZONE-SITE.XML_ozone.scm.address.%s.%s-scm-%d" $.Values.clusterId $.Release.Name $i }} value: {{ printf "%s-scm-%d.%s-scm-headless.%s.svc.cluster.local" $.Release.Name $i $.Release.Name $.Release.Namespace }} {{- end }} - name: OZONE-SITE.XML_ozone.scm.primordial.node.id @@ -160,7 +160,7 @@ app.kubernetes.io/instance: {{ .Release.Name }} - name: OZONE-SITE.XML_ozone.om.ratis.enable value: "true" - name: OZONE-SITE.XML_ozone.om.service.ids - value: cluster1 + value: {{ .Values.clusterId }} - name: OZONE-SITE.XML_hdds.scm.safemode.min.datanode value: "3" - name: OZONE-SITE.XML_ozone.datanode.pipeline.limit @@ -177,20 +177,20 @@ app.kubernetes.io/instance: {{ .Release.Name }} {{ include "ozone.configuration.env.common" . }} {{- if gt (len $dOmNodes) 0 }} {{- $decomIds := $dOmNodes | join "," }} -- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.cluster1 +- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.{{ .Values.clusterId }} value: {{ $decomIds }} {{- else}} -- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.cluster1 +- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.{{ .Values.clusterId }} value: "" {{- end }} -- name: OZONE-SITE.XML_ozone.om.nodes.cluster1 +- name: OZONE-SITE.XML_ozone.om.nodes.{{ .Values.clusterId }} value: {{ $activeOmNodes | join "," }} {{- range $tempId := $activeOmNodes }} -- name: {{ printf "OZONE-SITE.XML_ozone.om.address.cluster1.%s" $tempId }} +- name: {{ printf "OZONE-SITE.XML_ozone.om.address.%s.%s" $.Values.clusterId $tempId }} value: {{ printf "%s.%s-om-headless.%s.svc.cluster.local" $tempId $.Release.Name $.Release.Namespace }} {{- end }} {{- range $tempId := $dOmNodes }} -- name: {{ printf "OZONE-SITE.XML_ozone.om.address.cluster1.%s" $tempId }} +- name: {{ printf "OZONE-SITE.XML_ozone.om.address.%s.%s" $.Values.clusterId $tempId }} value: {{ printf "%s-helm-manager-decommission-%s-svc.%s.svc.cluster.local" $.Release.Name $tempId $.Release.Namespace }} {{- end }} {{- end }} @@ -202,15 +202,15 @@ app.kubernetes.io/instance: {{ .Release.Name }} {{- $activeOmNodes := ternary (splitList "," (include "ozone.om.cluster.ids" .)) (list) (ne "" (include "ozone.om.cluster.ids" .)) }} {{- $allOmNodes := concat $activeOmNodes $dOmNodes }} {{ include "ozone.configuration.env.common" . }} -- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.cluster1 +- name: OZONE-SITE.XML_ozone.om.decommissioned.nodes.{{ .Values.clusterId }} value: "" {{- range $tempId := $allOmNodes }} -- name: {{ printf "OZONE-SITE.XML_ozone.om.address.cluster1.%s" $tempId }} +- name: {{ printf "OZONE-SITE.XML_ozone.om.address.%s.%s" $.Values.clusterId $tempId }} value: {{ printf "%s.%s-om-headless.%s.svc.cluster.local" $tempId $.Release.Name $.Release.Namespace }} {{- end }} {{ $allOmNodes = append $allOmNodes "om-leader-transfer"}} -- name: OZONE-SITE.XML_ozone.om.nodes.cluster1 +- name: OZONE-SITE.XML_ozone.om.nodes.{{ .Values.clusterId }} value: {{ $allOmNodes | join "," }} -- name: "OZONE-SITE.XML_ozone.om.address.cluster1.om-leader-transfer" +- name: "OZONE-SITE.XML_ozone.om.address.{{ .Values.clusterId }}.om-leader-transfer" value: localhost {{- end }} \ No newline at end of file diff --git a/charts/ozone/templates/datanode/datanode-service-headless.yaml b/charts/ozone/templates/datanode/datanode-service-headless.yaml index 1f023ca..6c62959 100644 --- a/charts/ozone/templates/datanode/datanode-service-headless.yaml +++ b/charts/ozone/templates/datanode/datanode-service-headless.yaml @@ -29,9 +29,9 @@ spec: - name: ui port: {{ .Values.datanode.service.port }} - name: ratis-ipc - port: 9858 + port: {{ .Values.datanode.service.ratisIpcPort }} - name: ipc - port: 9859 + port: {{ .Values.datanode.service.ipcPort }} selector: {{- include "ozone.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: datanode diff --git a/charts/ozone/templates/datanode/datanode-statefulset.yaml b/charts/ozone/templates/datanode/datanode-statefulset.yaml index a91834a..8dd95aa 100644 --- a/charts/ozone/templates/datanode/datanode-statefulset.yaml +++ b/charts/ozone/templates/datanode/datanode-statefulset.yaml @@ -66,9 +66,9 @@ spec: - name: ui containerPort: {{ .Values.datanode.service.port }} - name: ratis-ipc - containerPort: 9858 + containerPort: {{ .Values.datanode.service.ratisIpcPort }} - name: ipc - containerPort: 9859 + containerPort: {{ .Values.datanode.service.ipcPort }} livenessProbe: httpGet: path: / diff --git a/charts/ozone/templates/helm/om-decommission-job.yaml b/charts/ozone/templates/helm/om-decommission-job.yaml index 573c559..845f39e 100644 --- a/charts/ozone/templates/helm/om-decommission-job.yaml +++ b/charts/ozone/templates/helm/om-decommission-job.yaml @@ -58,7 +58,7 @@ spec: echo "Decommission finalizer process finished!" exit 0 } - exec ozone admin om decommission -id=cluster1 -nodeid={{ $dnode }} -hostname={{ printf "%s-helm-manager-decommission-%s-svc.%s.svc.cluster.local" $.Release.Name $dnode $.Release.Namespace }} | decommission_finalizer + exec ozone admin om decommission -id={{ $.Values.clusterId }} -nodeid={{ $dnode }} -hostname={{ printf "%s-helm-manager-decommission-%s-svc.%s.svc.cluster.local" $.Release.Name $dnode $.Release.Namespace }} | decommission_finalizer env: {{- include "ozone.configuration.env" $ | nindent 12 }} {{- with $env }} diff --git a/charts/ozone/templates/helm/om-leader-transfer-job.yaml b/charts/ozone/templates/helm/om-leader-transfer-job.yaml index 013bb82..6542352 100644 --- a/charts/ozone/templates/helm/om-leader-transfer-job.yaml +++ b/charts/ozone/templates/helm/om-leader-transfer-job.yaml @@ -39,7 +39,7 @@ spec: - -c - | set -e - exec ozone admin om transfer -id=cluster1 -n={{ $.Release.Name }}-om-0 + exec ozone admin om transfer -id={{ $.Values.clusterId }} -n={{ $.Release.Name }}-om-0 env: {{- include "ozone.configuration.env.prehook" $ | nindent 12 }} {{- with $env }} diff --git a/charts/ozone/templates/om/om-statefulset.yaml b/charts/ozone/templates/om/om-statefulset.yaml index 777d4dc..e4013bd 100644 --- a/charts/ozone/templates/om/om-statefulset.yaml +++ b/charts/ozone/templates/om/om-statefulset.yaml @@ -104,7 +104,7 @@ spec: echo "$HOSTNAME must be started with bootstrap arg!" overwriteCmd="$(joinArr "${nodesConfigOverwriteList[@]}")" echo "Bootstrapping node config for this node: $overwriteCmd" - exec {{ printf "%s --set ozone.om.nodes.cluster1=" $flattenedArgs }}"$overwriteCmd" --bootstrap | bootstrap_finalizer + exec {{ printf "%s --set ozone.om.nodes.%s=" $flattenedArgs $.Values.clusterId }}"$overwriteCmd" --bootstrap | bootstrap_finalizer else echo "$HOSTNAME must not be started with bootstrap arg!" exec {{ join " " $.Values.om.args }} diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index edec31f..f1ea2b9 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -21,6 +21,9 @@ image: imagePullSecrets: [] +# Cluster ID +clusterId: cluster1 + # Common environment variables (templated) env: [] # Common envFrom items to set up environment variables (templated) @@ -85,6 +88,8 @@ datanode: service: type: ClusterIP port: 9882 + ratisIpcPort: 9858 + ipcPort: 9859 nodePort: ~ labels: {} annotations: {} @@ -119,7 +124,18 @@ om: # Constrain Ozone Manager pods to nodes with specific node labels nodeSelector: {} # Constrain Ozone Manager pods to nodes by affinity/anti-affinity rules - affinity: {} + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - scm + topologyKey: kubernetes.io/hostname # Allow to schedule Ozone Manager pods on nodes with matching taints tolerations: [] # Ozone Manager security context (overwrites common security context) @@ -162,7 +178,18 @@ scm: # Constrain Storage Container Manager pods to nodes with specific node labels nodeSelector: {} # Constrain Storage Container Manager pods to nodes by affinity/anti-affinity rules - affinity: {} + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - om + topologyKey: kubernetes.io/hostname # Allow to schedule Storage Container Manager pods on nodes with matching taints tolerations: [] # Storage Container Manager security context (overwrites common security context) From 7410ec322b0171b944dabdbaa8783f5d3853951c Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Thu, 23 Oct 2025 20:59:45 +0530 Subject: [PATCH 13/17] make ports parameterized, removed unwanted parameters --- .../templates/helm/om-decommission-job.yaml | 24 ++----------------- .../helm/om-decommission-service.yaml | 8 +++---- .../helm/om-leader-transfer-job.yaml | 24 ++----------------- .../templates/om/om-service-headless.yaml | 2 +- charts/ozone/templates/om/om-statefulset.yaml | 4 ++-- .../templates/scm/scm-service-headless.yaml | 10 ++++---- .../ozone/templates/scm/scm-statefulset.yaml | 10 ++++---- charts/ozone/values.yaml | 7 ++++++ 8 files changed, 28 insertions(+), 61 deletions(-) diff --git a/charts/ozone/templates/helm/om-decommission-job.yaml b/charts/ozone/templates/helm/om-decommission-job.yaml index 845f39e..45b2d98 100644 --- a/charts/ozone/templates/helm/om-decommission-job.yaml +++ b/charts/ozone/templates/helm/om-decommission-job.yaml @@ -68,24 +68,10 @@ spec: envFrom: {{- tpl (toYaml .) $ | nindent 12 }} {{- end }} ports: - - name: data-ratis-ipc - containerPort: 9858 - - name: data-ipc - containerPort: 9859 - - name: scm-rpc-client - containerPort: 9860 - - name: scm-block-cl - containerPort: 9863 - - name: scm-rpc-data - containerPort: 9861 - - name: scm-ratis - containerPort: 9894 - - name: scm-grpc - containerPort: 9895 - name: om-rpc - containerPort: 9862 + containerPort: {{ $.Values.om.service.rpcPort }} - name: om-ratis - containerPort: 9872 + containerPort: {{ $.Values.om.service.ratisPort }} volumeMounts: - name: config mountPath: {{ $.Values.configuration.dir }} @@ -96,12 +82,6 @@ spec: {{- with $nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} - {{- with $affinity }} - affinity: {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $tolerations }} - tolerations: {{- toYaml . | nindent 8 }} - {{- end }} {{- with $securityContext }} securityContext: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/ozone/templates/helm/om-decommission-service.yaml b/charts/ozone/templates/helm/om-decommission-service.yaml index c5b9209..466c5c5 100644 --- a/charts/ozone/templates/helm/om-decommission-service.yaml +++ b/charts/ozone/templates/helm/om-decommission-service.yaml @@ -19,11 +19,11 @@ spec: job-name: {{ printf "%s-helm-manager-decommission-%s" $.Release.Name $dnode }} ports: - name: rpc - port: 9862 - targetPort: 9862 + port: {{ $.Values.om.service.rpcPort }} + targetPort: {{ $.Values.om.service.rpcPort }} - name: ratis - port: 9872 - targetPort: 9872 + port: {{ $.Values.om.service.ratisPort }} + targetPort: {{ $.Values.om.service.ratisPort }} type: ClusterIP {{- end }} {{- end }} diff --git a/charts/ozone/templates/helm/om-leader-transfer-job.yaml b/charts/ozone/templates/helm/om-leader-transfer-job.yaml index 6542352..3b2b1b1 100644 --- a/charts/ozone/templates/helm/om-leader-transfer-job.yaml +++ b/charts/ozone/templates/helm/om-leader-transfer-job.yaml @@ -50,24 +50,10 @@ spec: {{- tpl (toYaml .) $ | nindent 12 }} {{- end }} ports: - - name: data-ratis-ipc - containerPort: 9858 - - name: data-ipc - containerPort: 9859 - - name: scm-rpc-client - containerPort: 9860 - - name: scm-block-cl - containerPort: 9863 - - name: scm-rpc-data - containerPort: 9861 - - name: scm-ratis - containerPort: 9894 - - name: scm-grpc - containerPort: 9895 - name: om-rpc - containerPort: 9862 + containerPort: {{ $.Values.om.service.rpcPort }} - name: om-ratis - containerPort: 9872 + containerPort: {{ $.Values.om.service.ratisPort }} volumeMounts: - name: config mountPath: {{ $.Values.configuration.dir }} @@ -76,12 +62,6 @@ spec: {{- with $nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} - {{- with $affinity }} - affinity: {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $tolerations }} - tolerations: {{- toYaml . | nindent 8 }} - {{- end }} {{- with $securityContext }} securityContext: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/ozone/templates/om/om-service-headless.yaml b/charts/ozone/templates/om/om-service-headless.yaml index df9ce55..813cef9 100644 --- a/charts/ozone/templates/om/om-service-headless.yaml +++ b/charts/ozone/templates/om/om-service-headless.yaml @@ -29,7 +29,7 @@ spec: - name: ui port: {{ .Values.om.service.port }} - name: ratis - port: 9872 + port: {{ .Values.om.service.ratisPort }} selector: {{- include "ozone.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: om diff --git a/charts/ozone/templates/om/om-statefulset.yaml b/charts/ozone/templates/om/om-statefulset.yaml index e4013bd..57c3a0f 100644 --- a/charts/ozone/templates/om/om-statefulset.yaml +++ b/charts/ozone/templates/om/om-statefulset.yaml @@ -127,11 +127,11 @@ spec: {{- end }} ports: - name: rpc - containerPort: 9862 + containerPort: {{ .Values.om.service.rpcPort }} - name: ui containerPort: {{ .Values.om.service.port }} - name: ratis - containerPort: 9872 + containerPort: {{ .Values.om.service.ratisPort }} livenessProbe: httpGet: path: / diff --git a/charts/ozone/templates/scm/scm-service-headless.yaml b/charts/ozone/templates/scm/scm-service-headless.yaml index 8890ffd..d71c004 100644 --- a/charts/ozone/templates/scm/scm-service-headless.yaml +++ b/charts/ozone/templates/scm/scm-service-headless.yaml @@ -29,16 +29,16 @@ spec: - name: ui port: {{ .Values.scm.service.port }} - name: rpc-datanode - port: 9861 + port: {{ .Values.scm.service.rpcDatanodePort }} - name: block-client - port: 9863 + port: {{ .Values.scm.service.blockClientPort }} - name: rpc-client - port: 9860 + port: {{ .Values.scm.service.rpcClientPort }} {{- if gt (int .Values.scm.replicas) 1 }} - name: ratis - port: 9894 + port: {{ .Values.scm.service.ratisPort }} - name: grpc - port: 9895 + port: {{ .Values.scm.service.grpcPort }} {{- end }} selector: {{- include "ozone.selectorLabels" . | nindent 4 }} diff --git a/charts/ozone/templates/scm/scm-statefulset.yaml b/charts/ozone/templates/scm/scm-statefulset.yaml index 8221f36..636888c 100644 --- a/charts/ozone/templates/scm/scm-statefulset.yaml +++ b/charts/ozone/templates/scm/scm-statefulset.yaml @@ -100,18 +100,18 @@ spec: {{- end }} ports: - name: rpc-client - containerPort: 9860 + containerPort: {{ .Values.scm.service.rpcClientPort }} - name: block-client - containerPort: 9863 + containerPort: {{ .Values.scm.service.blockClientPort }} - name: rpc-datanode - containerPort: 9861 + containerPort: {{ .Values.scm.service.rpcDatanodePort }} - name: ui containerPort: {{ .Values.scm.service.port }} {{- if gt (int .Values.scm.replicas) 1 }} - name: ratis - containerPort: 9894 + containerPort: {{ .Values.scm.service.ratisPort }} - name: grpc - containerPort: 9895 + containerPort: {{ .Values.scm.service.grpcPort }} {{- end }} livenessProbe: httpGet: diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index f1ea2b9..059406b 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -144,6 +144,8 @@ om: service: type: ClusterIP port: 9874 + ratisPort: 9872 + rpcPort: 9862 nodePort: ~ labels: {} annotations: {} @@ -198,6 +200,11 @@ scm: service: type: ClusterIP port: 9876 + rpcDatanodePort: 9861 + blockClientPort: 9863 + rpcClientPort: 9860 + ratisPort: 9894 + grpcPort: 9895 nodePort: ~ labels: {} annotations: {} From 81e09becbc5725325a1eafbdcbed7e27c2ac60d0 Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Wed, 29 Oct 2025 00:34:10 +0530 Subject: [PATCH 14/17] shift bootstrap logic to new file --- .../templates/om/om-bootstrap-configmap.yaml | 80 ++++++++++++++++ charts/ozone/templates/om/om-statefulset.yaml | 92 ++++++------------- 2 files changed, 110 insertions(+), 62 deletions(-) create mode 100644 charts/ozone/templates/om/om-bootstrap-configmap.yaml diff --git a/charts/ozone/templates/om/om-bootstrap-configmap.yaml b/charts/ozone/templates/om/om-bootstrap-configmap.yaml new file mode 100644 index 0000000..39aa61f --- /dev/null +++ b/charts/ozone/templates/om/om-bootstrap-configmap.yaml @@ -0,0 +1,80 @@ +{{- if and .Values.om.persistence.enabled (gt (len (ternary (splitList "," (include "ozone.om.bootstrap.nodes" .)) (list) (ne "" (include "ozone.om.bootstrap.nodes" .)))) 0) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-om-bootstrap-script + labels: + {{- include "ozone.labels" . | nindent 4 }} + app.kubernetes.io/component: om +data: + om-bootstrap.sh: |- + #!/bin/sh + set -e + + HELM_MANAGER_PATH="{{ .Values.om.persistence.path }}{{ .Values.helm.persistence.path }}" + HELM_MANAGER_BOOTSTRAPPED_FILE="$HELM_MANAGER_PATH/bootstrapped" + + # These are templated from Helm + OZONE_OM_ARGS_LIST="{{- range .Values.om.args }} {{ . }} {{- end }}" + OZONE_OM_BOOTSTRAP_NODES="{{ include "ozone.om.bootstrap.nodes" . }}" + OZONE_OM_CLUSTER_IDS="{{ include "ozone.om.cluster.ids" . }}" + OZONE_CLUSTER_ID="{{ .Values.clusterId }}" + + if [ -z "$OZONE_OM_BOOTSTRAP_NODES" ]; then + echo "No bootstrap handling needed!" + exit 0 + fi + + joinArr() { + local IFS="," + echo "$*" + } + + bootstrap_finalizer() { + echo "Init bootrap finalizer process..." + while true; do + IFS= read -r line; + echo "$line" + if echo "$line" | grep -q "Successfully bootstrapped OM $HOSTNAME and joined the Ratis group"; then + echo "$HOSTNAME was successfully bootstrapped!" + mkdir -p "$HELM_MANAGER_PATH" + touch "$HELM_MANAGER_BOOTSTRAPPED_FILE" + break; + fi + done + echo "Bootstrap finalizer process finished!" + exit 0 + } + + bootstrapHosts="$OZONE_OM_BOOTSTRAP_NODES" + echo "Need to handle bootstrap for nodes $bootstrapHosts" + + IFS=',' read -r -a hostArray <<< "$bootstrapHosts" + doBootstrap=false + nodesConfigOverwriteList=() + + for host in "${hostArray[@]}"; do + if [[ "$host" == "$HOSTNAME" ]]; then + doBootstrap=true + activeNodesConfig="$OZONE_OM_CLUSTER_IDS" + IFS=',' read -r -a overwriteArray <<< "$activeNodesConfig" + for overwriteHost in "${overwriteArray[@]}"; do + nodesConfigOverwriteList+=("$overwriteHost") + if [[ "$overwriteHost" == "$HOSTNAME" ]]; then + break; + fi + done + break + fi + done + + if [ "$doBootstrap" = true ] && [ ! -f "$HELM_MANAGER_BOOTSTRAPPED_FILE" ]; then + echo "$HOSTNAME must be started with bootstrap arg!" + overwriteCmd="$(joinArr "${nodesConfigOverwriteList[@]}")" + echo "Bootstrapping node config for this node: $overwriteCmd" + exec ozone admin om --set "ozone.om.nodes.$OZONE_CLUSTER_ID=$overwriteCmd" --bootstrap | bootstrap_finalizer + else + echo "$HOSTNAME must not be started with bootstrap arg, or is already bootstrapped." + exit 0 + fi +{{- end }} diff --git a/charts/ozone/templates/om/om-statefulset.yaml b/charts/ozone/templates/om/om-statefulset.yaml index 57c3a0f..41d285a 100644 --- a/charts/ozone/templates/om/om-statefulset.yaml +++ b/charts/ozone/templates/om/om-statefulset.yaml @@ -23,7 +23,6 @@ {{- $tolerations := or .Values.om.tolerations .Values.tolerations }} {{- $securityContext := or .Values.om.securityContext .Values.securityContext }} {{- $bnodes := ternary (splitList "," (include "ozone.om.bootstrap.nodes" .)) (list) (ne "" (include "ozone.om.bootstrap.nodes" .)) }} -{{- $activeNodes := ternary (splitList "," (include "ozone.om.cluster.ids" .)) (list) (ne "" (include "ozone.om.cluster.ids" .)) }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -46,6 +45,28 @@ spec: {{- include "ozone.selectorLabels" . | nindent 8 }} app.kubernetes.io/component: om spec: + {{- if and .Values.om.persistence.enabled (gt (len $bnodes) 0) }} + initContainers: + - name: om-bootstrap + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["/bin/sh", "/scripts/om-bootstrap.sh"] + env: + {{- include "ozone.configuration.env" . | nindent 12 }} + {{- with $env }} + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- with $envFrom }} + envFrom: {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + volumeMounts: + - name: config + mountPath: {{ .Values.configuration.dir }} + - name: {{ .Release.Name }}-om + mountPath: {{ .Values.om.persistence.path }} + - name: om-bootstrap-script + mountPath: /scripts + {{- end }} containers: - name: om image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" @@ -53,70 +74,11 @@ spec: {{- with .Values.om.command }} command: {{- tpl (toYaml .) $ | nindent 12 }} {{- end }} - args: - - sh - - -c - - | - set -e - HELM_MANAGER_PATH="{{ .Values.om.persistence.path }}{{ .Values.helm.persistence.path }}" - HELM_MANAGER_BOOTSTRAPPED_FILE="$HELM_MANAGER_PATH/bootstrapped" - {{- $flattenedArgs := join " " $.Values.om.args }} - {{- if and (.Values.om.persistence.enabled) (gt (len $bnodes) 0) }} - joinArr() { - local IFS="," - echo "$*" - } - bootstrap_finalizer() { - echo "Init bootrap finalizer process..." - while true; do - IFS= read -r line; - echo "$line" - if echo "$line" | grep -q "Successfully bootstrapped OM $HOSTNAME and joined the Ratis group"; then - echo "$HOSTNAME was successfully bootstrapped!" - mkdir -p "$HELM_MANAGER_PATH" - touch "$HELM_MANAGER_BOOTSTRAPPED_FILE" - break; - fi - done - echo "Bootstrap finalizer process finished!" - exit 0 - } - bootstrapHosts="{{ join "," $bnodes }}" - echo "Need to handle bootstrap for nodes $bootstrapHosts" - IFS=',' read -r -a hostArray <<< "$bootstrapHosts" - doBootstrap=false - nodesConfigOverwriteList=() - for host in "${hostArray[@]}"; do - if [[ "$host" == "$HOSTNAME" ]]; then - doBootstrap=true - activeNodesConfig="{{ join "," $activeNodes }}" - IFS=',' read -r -a overwriteArray <<< "$activeNodesConfig" - for overwriteHost in "${overwriteArray[@]}"; do - nodesConfigOverwriteList+=("$overwriteHost") - if [[ "$overwriteHost" == "$HOSTNAME" ]]; then - break; - fi - done - break - fi - done - if [ "$doBootstrap" = true ] && [ ! -f "$HELM_MANAGER_BOOTSTRAPPED_FILE" ]; then - echo "$HOSTNAME must be started with bootstrap arg!" - overwriteCmd="$(joinArr "${nodesConfigOverwriteList[@]}")" - echo "Bootstrapping node config for this node: $overwriteCmd" - exec {{ printf "%s --set ozone.om.nodes.%s=" $flattenedArgs $.Values.clusterId }}"$overwriteCmd" --bootstrap | bootstrap_finalizer - else - echo "$HOSTNAME must not be started with bootstrap arg!" - exec {{ join " " $.Values.om.args }} - fi - {{- else }} - echo "No bootstrap handling needed!" - exec {{ join " " $.Values.om.args }} - {{- end }} + args: {{- tpl (toYaml .Values.om.args) $ | nindent 12 }} env: {{- include "ozone.configuration.env" . | nindent 12 }} - name: WAITFOR - value: {{ $.Release.Name }}-scm-0.{{ $.Release.Name }}-scm-headless:9876 + value: {{ $.Release.Name }}-scm-0.{{ $.Release.Name }}-scm-headless:{{ .Values.scm.service.port }} - name: ENSURE_OM_INITIALIZED value: /data/metadata/om/current/VERSION {{- with $env }} @@ -158,6 +120,12 @@ spec: securityContext: {{- toYaml . | nindent 8 }} {{- end }} volumes: + {{- if and .Values.om.persistence.enabled (gt (len $bnodes) 0) }} + - name: om-bootstrap-script + configMap: + name: {{ .Release.Name }}-om-bootstrap-script + defaultMode: 0777 + {{- end }} - name: config projected: sources: From c0fb376573ec6822b19c7e0814896f979f2969d7 Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Wed, 29 Oct 2025 00:50:17 +0530 Subject: [PATCH 15/17] use exit code to confirm bootstrap --- .../templates/om/om-bootstrap-configmap.yaml | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/charts/ozone/templates/om/om-bootstrap-configmap.yaml b/charts/ozone/templates/om/om-bootstrap-configmap.yaml index 39aa61f..818ab3d 100644 --- a/charts/ozone/templates/om/om-bootstrap-configmap.yaml +++ b/charts/ozone/templates/om/om-bootstrap-configmap.yaml @@ -30,20 +30,19 @@ data: echo "$*" } - bootstrap_finalizer() { - echo "Init bootrap finalizer process..." - while true; do - IFS= read -r line; - echo "$line" - if echo "$line" | grep -q "Successfully bootstrapped OM $HOSTNAME and joined the Ratis group"; then - echo "$HOSTNAME was successfully bootstrapped!" - mkdir -p "$HELM_MANAGER_PATH" - touch "$HELM_MANAGER_BOOTSTRAPPED_FILE" - break; - fi - done - echo "Bootstrap finalizer process finished!" - exit 0 + run_bootstrap() { + local overwriteCmd="$1" + echo "Bootstrapping node config for this node: $overwriteCmd" + + if ozone admin om --set "ozone.om.nodes.$OZONE_CLUSTER_ID=$overwriteCmd" --bootstrap; then + echo "$HOSTNAME was successfully bootstrapped!" + mkdir -p "$HELM_MANAGER_PATH" + touch "$HELM_MANAGER_BOOTSTRAPPED_FILE" + exit 0 + else + echo "Bootstrap failed with exit code $?" + exit 1 + fi } bootstrapHosts="$OZONE_OM_BOOTSTRAP_NODES" @@ -71,8 +70,7 @@ data: if [ "$doBootstrap" = true ] && [ ! -f "$HELM_MANAGER_BOOTSTRAPPED_FILE" ]; then echo "$HOSTNAME must be started with bootstrap arg!" overwriteCmd="$(joinArr "${nodesConfigOverwriteList[@]}")" - echo "Bootstrapping node config for this node: $overwriteCmd" - exec ozone admin om --set "ozone.om.nodes.$OZONE_CLUSTER_ID=$overwriteCmd" --bootstrap | bootstrap_finalizer + run_bootstrap "$overwriteCmd" else echo "$HOSTNAME must not be started with bootstrap arg, or is already bootstrapped." exit 0 From 61dd755f0d2721e96731eb1f8b329e46bd570355 Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Tue, 6 Jan 2026 00:06:39 +0530 Subject: [PATCH 16/17] dynamic config in checksums for SCM/Datanode/S3G, exponential backoff retry to the OM bootstrap script, Added RPC port to OM headless service, made Ratis ports conditional --- .../datanode/datanode-statefulset.yaml | 2 +- .../helm/om-leader-transfer-job.yaml | 2 + .../templates/om/om-bootstrap-configmap.yaml | 38 ++++++++++++++----- .../templates/om/om-service-headless.yaml | 4 ++ .../ozone/templates/s3g/s3g-statefulset.yaml | 2 +- .../ozone/templates/scm/scm-statefulset.yaml | 2 +- charts/ozone/values.yaml | 2 +- 7 files changed, 39 insertions(+), 13 deletions(-) diff --git a/charts/ozone/templates/datanode/datanode-statefulset.yaml b/charts/ozone/templates/datanode/datanode-statefulset.yaml index 8dd95aa..0b9792a 100644 --- a/charts/ozone/templates/datanode/datanode-statefulset.yaml +++ b/charts/ozone/templates/datanode/datanode-statefulset.yaml @@ -39,7 +39,7 @@ spec: template: metadata: annotations: - checksum/config: {{ include (print $.Template.BasePath "/ozone-configmap.yaml") . | sha256sum }} + checksum/config: {{ include (print $.Template.BasePath "/ozone-configmap.yaml") . | cat (include "ozone.configuration.env" .) | sha256sum }} labels: {{- include "ozone.selectorLabels" . | nindent 8 }} app.kubernetes.io/component: datanode diff --git a/charts/ozone/templates/helm/om-leader-transfer-job.yaml b/charts/ozone/templates/helm/om-leader-transfer-job.yaml index 3b2b1b1..673b47a 100644 --- a/charts/ozone/templates/helm/om-leader-transfer-job.yaml +++ b/charts/ozone/templates/helm/om-leader-transfer-job.yaml @@ -52,8 +52,10 @@ spec: ports: - name: om-rpc containerPort: {{ $.Values.om.service.rpcPort }} + {{- if gt (int $.Values.om.replicas) 1 }} - name: om-ratis containerPort: {{ $.Values.om.service.ratisPort }} + {{- end }} volumeMounts: - name: config mountPath: {{ $.Values.configuration.dir }} diff --git a/charts/ozone/templates/om/om-bootstrap-configmap.yaml b/charts/ozone/templates/om/om-bootstrap-configmap.yaml index 818ab3d..141e19e 100644 --- a/charts/ozone/templates/om/om-bootstrap-configmap.yaml +++ b/charts/ozone/templates/om/om-bootstrap-configmap.yaml @@ -32,17 +32,37 @@ data: run_bootstrap() { local overwriteCmd="$1" + local max_attempts=3 + local attempt=1 + local base_delay=5 + local exit_code=0 + echo "Bootstrapping node config for this node: $overwriteCmd" - if ozone admin om --set "ozone.om.nodes.$OZONE_CLUSTER_ID=$overwriteCmd" --bootstrap; then - echo "$HOSTNAME was successfully bootstrapped!" - mkdir -p "$HELM_MANAGER_PATH" - touch "$HELM_MANAGER_BOOTSTRAPPED_FILE" - exit 0 - else - echo "Bootstrap failed with exit code $?" - exit 1 - fi + while [ $attempt -le $max_attempts ]; do + echo "Bootstrap attempt $attempt of $max_attempts" + + if ozone admin om --set "ozone.om.nodes.$OZONE_CLUSTER_ID=$overwriteCmd" --bootstrap; then + echo "$HOSTNAME was successfully bootstrapped!" + mkdir -p "$HELM_MANAGER_PATH" + touch "$HELM_MANAGER_BOOTSTRAPPED_FILE" + exit 0 + else + exit_code=$? + echo "Bootstrap failed with exit code $exit_code, attempt $attempt of $max_attempts" + + if [ $attempt -lt $max_attempts ]; then + local delay=$((base_delay * (1 << (attempt - 1)))) + echo "Retrying in $delay seconds..." + sleep $delay + fi + + attempt=$((attempt + 1)) + fi + done + + echo "Bootstrap failed after $max_attempts attempts with exit code $exit_code" + exit 1 } bootstrapHosts="$OZONE_OM_BOOTSTRAP_NODES" diff --git a/charts/ozone/templates/om/om-service-headless.yaml b/charts/ozone/templates/om/om-service-headless.yaml index 813cef9..8aaccaa 100644 --- a/charts/ozone/templates/om/om-service-headless.yaml +++ b/charts/ozone/templates/om/om-service-headless.yaml @@ -28,8 +28,12 @@ spec: ports: - name: ui port: {{ .Values.om.service.port }} + - name: rpc + port: {{ .Values.om.service.rpcPort }} + {{- if gt (int .Values.om.replicas) 1 }} - name: ratis port: {{ .Values.om.service.ratisPort }} + {{- end }} selector: {{- include "ozone.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: om diff --git a/charts/ozone/templates/s3g/s3g-statefulset.yaml b/charts/ozone/templates/s3g/s3g-statefulset.yaml index a9a2086..7e3d402 100644 --- a/charts/ozone/templates/s3g/s3g-statefulset.yaml +++ b/charts/ozone/templates/s3g/s3g-statefulset.yaml @@ -39,7 +39,7 @@ spec: template: metadata: annotations: - checksum/config: {{ include (print $.Template.BasePath "/ozone-configmap.yaml") . | sha256sum }} + checksum/config: {{ include (print $.Template.BasePath "/ozone-configmap.yaml") . | cat (include "ozone.configuration.env" .) | sha256sum }} labels: {{- include "ozone.selectorLabels" . | nindent 8 }} app.kubernetes.io/component: s3g diff --git a/charts/ozone/templates/scm/scm-statefulset.yaml b/charts/ozone/templates/scm/scm-statefulset.yaml index 636888c..35a51c6 100644 --- a/charts/ozone/templates/scm/scm-statefulset.yaml +++ b/charts/ozone/templates/scm/scm-statefulset.yaml @@ -40,7 +40,7 @@ spec: template: metadata: annotations: - checksum/config: {{ include (print $.Template.BasePath "/ozone-configmap.yaml") . | sha256sum }} + checksum/config: {{ include (print $.Template.BasePath "/ozone-configmap.yaml") . | cat (include "ozone.configuration.env" .) | sha256sum }} labels: {{- include "ozone.selectorLabels" . | nindent 8 }} app.kubernetes.io/component: scm diff --git a/charts/ozone/values.yaml b/charts/ozone/values.yaml index 059406b..0ec323c 100644 --- a/charts/ozone/values.yaml +++ b/charts/ozone/values.yaml @@ -166,7 +166,7 @@ om: # Storage Container Manager configuration scm: # Number of Storage Container Manager replicas - replicas: 3 + replicas: 1 # Command to launch Storage Container Manager (templated) command: ~ # Arguments to launch Storage Container Manager (templated) From 769b221483751e1b1ab62503d579ad63c015dc22 Mon Sep 17 00:00:00 2001 From: tejaskriya Date: Tue, 6 Jan 2026 00:19:43 +0530 Subject: [PATCH 17/17] lint errors --- charts/ozone/templates/datanode/datanode-statefulset.yaml | 2 +- charts/ozone/templates/om/om-statefulset.yaml | 2 +- charts/ozone/templates/s3g/s3g-statefulset.yaml | 2 +- charts/ozone/templates/scm/scm-statefulset.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/charts/ozone/templates/datanode/datanode-statefulset.yaml b/charts/ozone/templates/datanode/datanode-statefulset.yaml index 0d96b72..5b9dfce 100644 --- a/charts/ozone/templates/datanode/datanode-statefulset.yaml +++ b/charts/ozone/templates/datanode/datanode-statefulset.yaml @@ -18,7 +18,7 @@ {{- $env := concat .Values.env .Values.datanode.env }} {{- $envFrom := concat .Values.envFrom .Values.datanode.envFrom }} -{{- $podAnnotations := mergeOverwrite (deepCopy .Values.podAnnotations) .Values.datanode.podAnnotations }} +{{- $podAnnotations := mergeOverwrite (deepCopy (default dict .Values.podAnnotations)) (default dict .Values.datanode.podAnnotations) }} {{- $nodeSelector := or .Values.datanode.nodeSelector .Values.nodeSelector }} {{- $affinity := or .Values.datanode.affinity .Values.affinity }} {{- $tolerations := or .Values.datanode.tolerations .Values.tolerations }} diff --git a/charts/ozone/templates/om/om-statefulset.yaml b/charts/ozone/templates/om/om-statefulset.yaml index d42fdac..75f7d89 100644 --- a/charts/ozone/templates/om/om-statefulset.yaml +++ b/charts/ozone/templates/om/om-statefulset.yaml @@ -18,7 +18,7 @@ {{- $env := concat .Values.env .Values.om.env }} {{- $envFrom := concat .Values.envFrom .Values.om.envFrom }} -{{- $podAnnotations := mergeOverwrite (deepCopy .Values.podAnnotations) .Values.om.podAnnotations }} +{{- $podAnnotations := mergeOverwrite (deepCopy (default dict .Values.podAnnotations)) (default dict .Values.om.podAnnotations) }} {{- $nodeSelector := or .Values.om.nodeSelector .Values.nodeSelector }} {{- $affinity := or .Values.om.affinity .Values.affinity }} {{- $tolerations := or .Values.om.tolerations .Values.tolerations }} diff --git a/charts/ozone/templates/s3g/s3g-statefulset.yaml b/charts/ozone/templates/s3g/s3g-statefulset.yaml index a59d838..51f15fb 100644 --- a/charts/ozone/templates/s3g/s3g-statefulset.yaml +++ b/charts/ozone/templates/s3g/s3g-statefulset.yaml @@ -18,7 +18,7 @@ {{- $env := concat .Values.env .Values.s3g.env }} {{- $envFrom := concat .Values.envFrom .Values.s3g.envFrom }} -{{- $podAnnotations := mergeOverwrite (deepCopy .Values.podAnnotations) .Values.s3g.podAnnotations }} +{{- $podAnnotations := mergeOverwrite (deepCopy (default dict .Values.podAnnotations)) (default dict .Values.s3g.podAnnotations) }} {{- $nodeSelector := or .Values.s3g.nodeSelector .Values.nodeSelector }} {{- $affinity := or .Values.s3g.affinity .Values.affinity }} {{- $tolerations := or .Values.s3g.tolerations .Values.tolerations }} diff --git a/charts/ozone/templates/scm/scm-statefulset.yaml b/charts/ozone/templates/scm/scm-statefulset.yaml index d4a7a6f..6c1d144 100644 --- a/charts/ozone/templates/scm/scm-statefulset.yaml +++ b/charts/ozone/templates/scm/scm-statefulset.yaml @@ -18,7 +18,7 @@ {{- $env := concat .Values.env .Values.scm.env }} {{- $envFrom := concat .Values.envFrom .Values.scm.envFrom }} -{{- $podAnnotations := mergeOverwrite (deepCopy .Values.podAnnotations) .Values.scm.podAnnotations }} +{{- $podAnnotations := mergeOverwrite (deepCopy (default dict .Values.podAnnotations)) (default dict .Values.scm.podAnnotations) }} {{- $nodeSelector := or .Values.scm.nodeSelector .Values.nodeSelector }} {{- $affinity := or .Values.scm.affinity .Values.affinity }} {{- $tolerations := or .Values.scm.tolerations .Values.tolerations }}