Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions docs/reference/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,35 @@ Package v1 contains API Schema definitions for the ray v1 API group



#### AuthMode

_Underlying type:_ _string_

AuthMode describes the authentication mode for the Ray cluster.



_Appears in:_
- [AuthOptions](#authoptions)



#### AuthOptions



AuthOptions defines the authentication options for a RayCluster.



_Appears in:_
- [RayClusterSpec](#rayclusterspec)

| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `mode` _[AuthMode](#authmode)_ | Mode specifies the authentication mode.<br />Supported values are "disabled" and "token".<br />Defaults to "token". | | Enum: [disabled token] <br /> |
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it’s better to set the default to disabled, as token authentication requires Ray >= 2.51.0 and some users may still be on older pinned versions.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's worth mentioning the mode only defaults to token when authOptions != nil



#### AutoscalerOptions


Expand Down Expand Up @@ -268,6 +297,7 @@ _Appears in:_

| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `authOptions` _[AuthOptions](#authoptions)_ | AuthOptions specifies the authentication options for the RayCluster. | | |
| `suspend` _boolean_ | Suspend indicates whether a RayCluster should be suspended.<br />A suspended RayCluster will have head pods and worker pods deleted. | | |
| `managedBy` _string_ | ManagedBy is an optional configuration for the controller or entity that manages a RayCluster.<br />The value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'.<br />The kuberay-operator reconciles a RayCluster which doesn't have this field at all or<br />the field value is the reserved string 'ray.io/kuberay-operator',<br />but delegates reconciling the RayCluster with 'kueue.x-k8s.io/multikueue' to the Kueue.<br />The field is immutable. | | |
| `autoscalerOptions` _[AutoscalerOptions](#autoscaleroptions)_ | AutoscalerOptions specifies optional configuration for the Ray autoscaler. | | |
Expand Down
8 changes: 8 additions & 0 deletions helm-chart/kuberay-operator/crds/ray.io_rayclusters.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions helm-chart/kuberay-operator/crds/ray.io_rayjobs.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions helm-chart/kuberay-operator/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,17 @@ rules:
- pods/resize
verbs:
- patch
- apiGroups:
- ""
resources:
- secrets
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
Expand Down
23 changes: 23 additions & 0 deletions ray-operator/apis/ray/v1/raycluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import (

// RayClusterSpec defines the desired state of RayCluster
type RayClusterSpec struct {
// AuthOptions specifies the authentication options for the RayCluster.
// +optional
AuthOptions *AuthOptions `json:"authOptions,omitempty"`
// Suspend indicates whether a RayCluster should be suspended.
// A suspended RayCluster will have head pods and worker pods deleted.
// +optional
Expand Down Expand Up @@ -46,6 +49,26 @@ type RayClusterSpec struct {
WorkerGroupSpecs []WorkerGroupSpec `json:"workerGroupSpecs,omitempty"`
}

// AuthMode describes the authentication mode for the Ray cluster.
type AuthMode string

const (
// AuthModeDisabled disables authentication.
AuthModeDisabled AuthMode = "disabled"
// AuthModeToken enables token-based authentication.
AuthModeToken AuthMode = "token"
)

// AuthOptions defines the authentication options for a RayCluster.
type AuthOptions struct {
// Mode specifies the authentication mode.
// Supported values are "disabled" and "token".
// Defaults to "token".
// +kubebuilder:validation:Enum=disabled;token
// +optional
Mode AuthMode `json:"mode,omitempty"`
}

// GcsFaultToleranceOptions contains configs for GCS FT
type GcsFaultToleranceOptions struct {
// +optional
Expand Down
20 changes: 20 additions & 0 deletions ray-operator/apis/ray/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions ray-operator/config/crd/bases/ray.io_rayclusters.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions ray-operator/config/crd/bases/ray.io_rayjobs.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions ray-operator/config/crd/bases/ray.io_rayservices.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions ray-operator/config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@ rules:
- pods/resize
verbs:
- patch
- apiGroups:
- ""
resources:
- secrets
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
Expand Down
47 changes: 47 additions & 0 deletions ray-operator/controllers/ray/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,12 @@ func DefaultHeadPodTemplate(ctx context.Context, instance rayv1.RayCluster, head
autoscalerImage := podTemplate.Spec.Containers[utils.RayContainerIndex].Image
// inject autoscaler container into head pod
autoscalerContainer := BuildAutoscalerContainer(autoscalerImage)

// Configure RAY_AUTH_TOKEN and RAY_AUTH_MODE if auth is enabled.
if utils.IsAuthEnabled(&instance.Spec) {
setContainerTokenAuthEnvVars(instance.Name, &autoscalerContainer)
}

// Merge the user overrides from autoscalerOptions into the autoscaler container config.
mergeAutoscalerOverrides(&autoscalerContainer, instance.Spec.AutoscalerOptions)
podTemplate.Spec.Containers = append(podTemplate.Spec.Containers, autoscalerContainer)
Expand All @@ -221,6 +227,10 @@ func DefaultHeadPodTemplate(ctx context.Context, instance rayv1.RayCluster, head
podTemplate.Spec.Containers[utils.RayContainerIndex].Ports = append(podTemplate.Spec.Containers[utils.RayContainerIndex].Ports, metricsPort)
}

if utils.IsAuthEnabled(&instance.Spec) {
configureTokenAuth(instance.Name, &podTemplate)
}

return podTemplate
}

Expand All @@ -236,6 +246,39 @@ func setAutoscalerV2EnvVars(podTemplate *corev1.PodTemplateSpec) {
})
}

// configureTokenAuth sets environment variables required for Ray token authentication
func configureTokenAuth(clusterName string, podTemplate *corev1.PodTemplateSpec) {
setContainerTokenAuthEnvVars(clusterName, &podTemplate.Spec.Containers[utils.RayContainerIndex])

// Configure auth token for wait-gcs-ready init container if it exists
for i, initContainer := range podTemplate.Spec.InitContainers {
if initContainer.Name != "wait-gcs-ready" {
continue
}

setContainerTokenAuthEnvVars(clusterName, &podTemplate.Spec.InitContainers[i])
}
}

// setContainerTokenAuthEnvVars sets Ray authentication env vars for a container.
func setContainerTokenAuthEnvVars(clusterName string, container *corev1.Container) {
container.Env = append(container.Env, corev1.EnvVar{
Name: utils.RAY_AUTH_MODE_ENV_VAR,
Value: string(rayv1.AuthModeToken),
})

secretName := utils.CheckName(clusterName)
container.Env = append(container.Env, corev1.EnvVar{
Name: utils.RAY_AUTH_TOKEN_ENV_VAR,
ValueFrom: &corev1.EnvVarSource{
SecretKeyRef: &corev1.SecretKeySelector{
LocalObjectReference: corev1.LocalObjectReference{Name: secretName},
Key: utils.RAY_AUTH_TOKEN_SECRET_KEY,
},
},
})
}

func getEnableInitContainerInjection() bool {
if s := os.Getenv(EnableInitContainerInjectionEnvKey); strings.ToLower(s) == "false" {
return false
Expand Down Expand Up @@ -358,6 +401,10 @@ func DefaultWorkerPodTemplate(ctx context.Context, instance rayv1.RayCluster, wo
podTemplate.Spec.RestartPolicy = corev1.RestartPolicyNever
}

if utils.IsAuthEnabled(&instance.Spec) {
configureTokenAuth(instance.Name, &podTemplate)
}

return podTemplate
}

Expand Down
Loading
Loading