diff --git a/helm-chart/kuberay-operator/README.md b/helm-chart/kuberay-operator/README.md index 85fb7c51a39..2c57ab91216 100644 --- a/helm-chart/kuberay-operator/README.md +++ b/helm-chart/kuberay-operator/README.md @@ -167,8 +167,10 @@ spec: | logging.sizeLimit | string | `""` | EmptyDir volume size limit for kuberay-operator log file. | | batchScheduler.enabled | bool | `false` | | | batchScheduler.name | string | `""` | | -| configuration.enabled | bool | `false` | Whether to enable the configuration feature. If enabled, a ConfigMap will be created and mounted to the operator. | +| configuration.enabled | bool | `false` | Whether to enable the configuration feature. If enabled, a ConfigMap will be created and mounted to the operator. When enabled, flag-based configuration values (leaderElectionEnabled, metrics.enabled, kubeClient.qps, etc.) will be injected into the ConfigMap. The operator will use the ConfigMap instead of command-line flags. | | configuration.defaultContainerEnvs | list | `[]` | Default environment variables to inject into all Ray containers in all RayCluster CRs. This allows user to set feature flags across all Ray pods. Example: defaultContainerEnvs: - name: RAY_enable_open_telemetry value: "true" - name: RAY_metric_cardinality_level value: "recommended" | +| configuration.headSidecarContainers | list | `[]` | Sidecar containers to inject into every Ray head pod. Example: headSidecarContainers: - name: fluentbit image: fluent/fluent-bit:1.9 | +| configuration.workerSidecarContainers | list | `[]` | Sidecar containers to inject into every Ray worker pod. Example: workerSidecarContainers: - name: fluentbit image: fluent/fluent-bit:1.9 | | featureGates[0].name | string | `"RayClusterStatusConditions"` | | | featureGates[0].enabled | bool | `true` | | | featureGates[1].name | string | `"RayJobDeletionPolicy"` | | diff --git a/helm-chart/kuberay-operator/templates/_helpers.tpl b/helm-chart/kuberay-operator/templates/_helpers.tpl index d9f707b13d7..af0deec1a7d 100644 --- a/helm-chart/kuberay-operator/templates/_helpers.tpl +++ b/helm-chart/kuberay-operator/templates/_helpers.tpl @@ -104,6 +104,34 @@ FeatureGates {{- include "kuberay-operator.fullname" . -}} {{- end -}} +{{/* +Validate operator configuration values. +This template validates reconcileConcurrency, kubeClient.qps, and kubeClient.burst. +It should be called early in the deployment to ensure invalid values are caught. +*/}} +{{- define "kuberay-operator.validateConfig" -}} +{{- if hasKey .Values "reconcileConcurrency" }} +{{- $rc := toString .Values.reconcileConcurrency }} +{{- if not (regexMatch "^[1-9][0-9]*$" $rc) }} +{{- fail (printf "values.reconcileConcurrency must be a positive integer, got %q" $rc) }} +{{- end }} +{{- end }} +{{- if hasKey .Values "kubeClient" }} +{{- if hasKey .Values.kubeClient "qps" }} +{{- $qps := toString .Values.kubeClient.qps }} +{{- if not (regexMatch "^[+-]?[0-9]+(\\.[0-9]+)?$" $qps) }} +{{- fail (printf "values.kubeClient.qps must be a valid float number, got %q" $qps) }} +{{- end }} +{{- end }} +{{- if hasKey .Values.kubeClient "burst" }} +{{- $burst := toString .Values.kubeClient.burst }} +{{- if not (regexMatch "^[0-9]+$" $burst) }} +{{- fail (printf "values.kubeClient.burst must be a non-negative integer, got %q" $burst) }} +{{- end }} +{{- end }} +{{- end }} +{{- end }} + {{- /* Create the name of the leader election role to use. */ -}} {{- define "kuberay-operator.leaderElectionRole.name" -}} {{- include "kuberay-operator.fullname" . -}}-leader-election diff --git a/helm-chart/kuberay-operator/templates/configmap.yaml b/helm-chart/kuberay-operator/templates/configmap.yaml index 7d095a74362..c677bf5e933 100644 --- a/helm-chart/kuberay-operator/templates/configmap.yaml +++ b/helm-chart/kuberay-operator/templates/configmap.yaml @@ -1,4 +1,5 @@ {{- if .Values.configuration.enabled }} +{{- include "kuberay-operator.validateConfig" . -}} apiVersion: v1 kind: ConfigMap metadata: @@ -13,6 +14,56 @@ data: config.yaml: | apiVersion: config.ray.io/v1alpha1 kind: Configuration + {{- if hasKey .Values "leaderElectionEnabled" }} + enableLeaderElection: {{ .Values.leaderElectionEnabled }} + {{- end }} + {{- if and (hasKey .Values "metrics") (hasKey .Values.metrics "enabled") }} + enableMetrics: {{ .Values.metrics.enabled }} + {{- end }} + {{- if hasKey .Values "reconcileConcurrency" }} + reconcileConcurrency: {{ .Values.reconcileConcurrency }} + {{- end }} + {{- if hasKey .Values "useKubernetesProxy" }} + useKubernetesProxy: {{ .Values.useKubernetesProxy }} + {{- end }} + {{- if hasKey .Values "kubeClient" }} + {{- if hasKey .Values.kubeClient "qps" }} + qps: {{ .Values.kubeClient.qps }} + {{- end }} + {{- if hasKey .Values.kubeClient "burst" }} + burst: {{ .Values.kubeClient.burst }} + {{- end }} + {{- end }} + {{- if .Values.watchNamespace }} + watchNamespace: {{ join "," .Values.watchNamespace | quote }} + {{- else if .Values.singleNamespaceInstall }} + watchNamespace: {{ .Release.Namespace | quote }} + {{- end }} + {{- if and (.Values.logging.baseDir) (.Values.logging.fileName) }} + logFile: {{ printf "%s/%s" .Values.logging.baseDir .Values.logging.fileName | quote }} + {{- end }} + {{- if .Values.logging.stdoutEncoder }} + logStdoutEncoder: {{ .Values.logging.stdoutEncoder | quote }} + {{- end }} + {{- if .Values.logging.fileEncoder }} + logFileEncoder: {{ .Values.logging.fileEncoder | quote }} + {{- end }} + {{- if .Values.batchScheduler }} + {{- if .Values.batchScheduler.enabled }} + enableBatchScheduler: true + {{- end }} + {{- if .Values.batchScheduler.name }} + batchScheduler: {{ .Values.batchScheduler.name | quote }} + {{- end }} + {{- end }} + {{- if .Values.configuration.headSidecarContainers }} + headSidecarContainers: + {{- toYaml .Values.configuration.headSidecarContainers | nindent 4 }} + {{- end }} + {{- if .Values.configuration.workerSidecarContainers }} + workerSidecarContainers: + {{- toYaml .Values.configuration.workerSidecarContainers | nindent 4 }} + {{- end }} {{- if .Values.configuration.defaultContainerEnvs }} defaultContainerEnvs: {{- toYaml .Values.configuration.defaultContainerEnvs | nindent 4 }} diff --git a/helm-chart/kuberay-operator/templates/deployment.yaml b/helm-chart/kuberay-operator/templates/deployment.yaml index 337dcc60bae..fa4278ed707 100644 --- a/helm-chart/kuberay-operator/templates/deployment.yaml +++ b/helm-chart/kuberay-operator/templates/deployment.yaml @@ -88,8 +88,8 @@ spec: {{- if .Values.configuration.enabled -}} {{- $argList = append $argList "--config" -}} {{- $argList = append $argList "/etc/kuberay/config.yaml" -}} - {{- end -}} - {{- $argList = append $argList (include "kuberay.featureGates" . | trim) -}} + {{- else -}} + {{- include "kuberay-operator.validateConfig" . -}} {{- if .Values.batchScheduler -}} {{- if .Values.batchScheduler.enabled -}} {{- $argList = append $argList "--enable-batch-scheduler" -}} @@ -130,28 +130,18 @@ spec: {{- $argList = append $argList (printf "--enable-metrics=%t" .Values.metrics.enabled) -}} {{- end -}} {{- if hasKey .Values "reconcileConcurrency" -}} - {{- $rc := toString .Values.reconcileConcurrency }} - {{- if not (regexMatch "^[1-9][0-9]*$" $rc) }} - {{- fail (printf "values.reconcileConcurrency must be a positive integer, got %q" $rc) }} - {{- end }} {{- $argList = append $argList (printf "--reconcile-concurrency=%v" .Values.reconcileConcurrency) -}} {{- end -}} {{- if hasKey .Values "kubeClient" -}} {{- if hasKey .Values.kubeClient "qps" -}} - {{- $qps := toString .Values.kubeClient.qps }} - {{- if not (regexMatch "^[+-]?[0-9]+(\\.[0-9]+)?$" $qps) }} - {{- fail (printf "values.kubeClient.qps must be a valid float number, got %q" $qps) }} - {{- end }} {{- $argList = append $argList (printf "--qps=%v" .Values.kubeClient.qps) -}} {{- end -}} {{- if hasKey .Values.kubeClient "burst" -}} - {{- $burst := toString .Values.kubeClient.burst }} - {{- if not (regexMatch "^[0-9]+$" $burst) }} - {{- fail (printf "values.kubeClient.burst must be a non-negative integer, got %q" $burst) }} - {{- end }} {{- $argList = append $argList (printf "--burst=%v" .Values.kubeClient.burst) -}} {{- end -}} {{- end -}} + {{- end -}} + {{- $argList = append $argList (include "kuberay.featureGates" . | trim) -}} {{- (printf "\n") -}} {{- $argList | toYaml | indent 12 }} ports: diff --git a/helm-chart/kuberay-operator/tests/configmap_test.yaml b/helm-chart/kuberay-operator/tests/configmap_test.yaml index 94f4011ef30..3bb15e78803 100644 --- a/helm-chart/kuberay-operator/tests/configmap_test.yaml +++ b/helm-chart/kuberay-operator/tests/configmap_test.yaml @@ -98,3 +98,197 @@ tests: - notMatchRegex: path: data["config.yaml"] pattern: "defaultContainerEnvs:" + + - it: Should include enableLeaderElection from values.leaderElectionEnabled + set: + configuration: + enabled: true + leaderElectionEnabled: false + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: "enableLeaderElection: false" + + - it: Should include enableMetrics from values.metrics.enabled + set: + configuration: + enabled: true + metrics: + enabled: false + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: "enableMetrics: false" + + - it: Should include reconcileConcurrency from values + set: + configuration: + enabled: true + reconcileConcurrency: 5 + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: "reconcileConcurrency: 5" + + - it: Should include qps and burst from values.kubeClient + set: + configuration: + enabled: true + kubeClient: + qps: 200 + burst: 400 + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: "qps: 200" + - matchRegex: + path: data["config.yaml"] + pattern: "burst: 400" + + - it: Should include useKubernetesProxy from values + set: + configuration: + enabled: true + useKubernetesProxy: true + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: "useKubernetesProxy: true" + + - it: Should include watchNamespace from values + set: + configuration: + enabled: true + watchNamespace: + - namespace1 + - namespace2 + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: 'watchNamespace: "namespace1,namespace2"' + + - it: Should include batchScheduler configuration + set: + configuration: + enabled: true + batchScheduler: + enabled: true + name: volcano + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: "enableBatchScheduler: true" + - matchRegex: + path: data["config.yaml"] + pattern: 'batchScheduler: "volcano"' + + - it: Should include logging configuration + set: + configuration: + enabled: true + logging: + baseDir: /var/log + fileName: operator.log + stdoutEncoder: console + fileEncoder: json + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: 'logFile: "/var/log/operator.log"' + - matchRegex: + path: data["config.yaml"] + pattern: 'logStdoutEncoder: "console"' + - matchRegex: + path: data["config.yaml"] + pattern: 'logFileEncoder: "json"' + + - it: Should include headSidecarContainers when set + set: + configuration: + enabled: true + headSidecarContainers: + - name: fluentbit + image: fluent/fluent-bit:1.9 + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: "headSidecarContainers:" + - matchRegex: + path: data["config.yaml"] + pattern: "name: fluentbit" + - matchRegex: + path: data["config.yaml"] + pattern: "image: fluent/fluent-bit:1.9" + + - it: Should include workerSidecarContainers when set + set: + configuration: + enabled: true + workerSidecarContainers: + - name: monitoring + image: prometheus/node-exporter:latest + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: "workerSidecarContainers:" + - matchRegex: + path: data["config.yaml"] + pattern: "name: monitoring" + - matchRegex: + path: data["config.yaml"] + pattern: "image: prometheus/node-exporter:latest" + + - it: Should not include headSidecarContainers when not set + set: + configuration: + enabled: true + asserts: + - notMatchRegex: + path: data["config.yaml"] + pattern: "headSidecarContainers:" + + - it: Should not include workerSidecarContainers when not set + set: + configuration: + enabled: true + asserts: + - notMatchRegex: + path: data["config.yaml"] + pattern: "workerSidecarContainers:" + + - it: Should include all configurations from values when configuration.enabled is true + set: + configuration: + enabled: true + defaultContainerEnvs: + - name: RAY_TEST + value: "test" + leaderElectionEnabled: false + metrics: + enabled: false + reconcileConcurrency: 3 + kubeClient: + qps: 150 + burst: 300 + asserts: + - matchRegex: + path: data["config.yaml"] + pattern: "enableLeaderElection: false" + - matchRegex: + path: data["config.yaml"] + pattern: "enableMetrics: false" + - matchRegex: + path: data["config.yaml"] + pattern: "reconcileConcurrency: 3" + - matchRegex: + path: data["config.yaml"] + pattern: "qps: 150" + - matchRegex: + path: data["config.yaml"] + pattern: "burst: 300" + - matchRegex: + path: data["config.yaml"] + pattern: "defaultContainerEnvs:" + - matchRegex: + path: data["config.yaml"] + pattern: "name: RAY_TEST" diff --git a/helm-chart/kuberay-operator/tests/deployment_test.yaml b/helm-chart/kuberay-operator/tests/deployment_test.yaml index 7a420217596..8b2d5e1e631 100644 --- a/helm-chart/kuberay-operator/tests/deployment_test.yaml +++ b/helm-chart/kuberay-operator/tests/deployment_test.yaml @@ -347,3 +347,59 @@ tests: asserts: - notExists: path: spec.template.spec.volumes[?(@.name=="operator-config")] + + - it: Should not include command-line flags when configuration is enabled (ConfigMap mode) + set: + configuration: + enabled: true + reconcileConcurrency: 5 + kubeClient: + qps: 100 + burst: 200 + leaderElectionEnabled: false + metrics: + enabled: false + asserts: + - contains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--config" + - contains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "/etc/kuberay/config.yaml" + - notContains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--reconcile-concurrency=5" + - notContains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--qps=100" + - notContains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--burst=200" + - notContains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--enable-leader-election=false" + - notContains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--enable-metrics=false" + + - it: Should include command-line flags when configuration is disabled (command-line mode) + set: + configuration: + enabled: false + reconcileConcurrency: 5 + kubeClient: + qps: 100 + burst: 200 + asserts: + - notContains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--config" + - contains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--reconcile-concurrency=5" + - contains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--qps=100" + - contains: + path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args + content: "--burst=200" diff --git a/helm-chart/kuberay-operator/values.yaml b/helm-chart/kuberay-operator/values.yaml index c566c6525c6..564c783a844 100644 --- a/helm-chart/kuberay-operator/values.yaml +++ b/helm-chart/kuberay-operator/values.yaml @@ -104,6 +104,8 @@ batchScheduler: # Configuration for the KubeRay operator. configuration: # -- Whether to enable the configuration feature. If enabled, a ConfigMap will be created and mounted to the operator. + # When enabled, flag-based configuration values (leaderElectionEnabled, metrics.enabled, kubeClient.qps, etc.) + # will be injected into the ConfigMap. The operator will use the ConfigMap instead of command-line flags. enabled: false # -- Default environment variables to inject into all Ray containers in all RayCluster CRs. # This allows user to set feature flags across all Ray pods. @@ -115,6 +117,20 @@ configuration: # value: "recommended" defaultContainerEnvs: [] + # -- Sidecar containers to inject into every Ray head pod. + # Example: + # headSidecarContainers: + # - name: fluentbit + # image: fluent/fluent-bit:1.9 + headSidecarContainers: [] + + # -- Sidecar containers to inject into every Ray worker pod. + # Example: + # workerSidecarContainers: + # - name: fluentbit + # image: fluent/fluent-bit:1.9 + workerSidecarContainers: [] + featureGates: - name: RayClusterStatusConditions enabled: true