Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions ray-operator/controllers/ray/utils/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const (
ServeName = "serve"
ClusterDomainEnvKey = "CLUSTER_DOMAIN"
DefaultDomainName = "cluster.local"
ContainersNotReady = "ContainersNotReady"
)

// TODO (kevin85421): Define CRDType here rather than constant.go to avoid circular dependency.
Expand Down Expand Up @@ -103,12 +104,36 @@ func FindHeadPodReadyCondition(headPod *corev1.Pod) metav1.Condition {
headPodReadyCondition.Reason = reason
}

// If reason is ContainersNotReady, then replace it with an available
// container status that may illuminate why the container is not ready.
if reason == ContainersNotReady {
reason, message, ok := firstNotReadyContainerStatus(headPod)
if ok {
if headPodReadyCondition.Message != "" {
headPodReadyCondition.Message += "; "
}
headPodReadyCondition.Message += message
headPodReadyCondition.Reason = reason
}
}

// Since we're only interested in the PodReady condition, break after processing it
break
}
return headPodReadyCondition
}

func firstNotReadyContainerStatus(pod *corev1.Pod) (reason string, message string, ok bool) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: named returned variables are not needed here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I named the return variables for documentation, to disambiguate the two strings

for _, status := range pod.Status.ContainerStatuses {
if status.State.Waiting != nil {
return status.State.Waiting.Reason, fmt.Sprintf("%s: %s", status.Name, status.State.Waiting.Message), true
} else if status.State.Terminated != nil {
return status.State.Terminated.Reason, fmt.Sprintf("%s: %s", status.Name, status.State.Terminated.Message), true
}
}
return "", "", false
}

// FindRayClusterSuspendStatus returns the current suspend status from two conditions:
// 1. rayv1.RayClusterSuspending
// 2. rayv1.RayClusterSuspended
Expand Down
93 changes: 88 additions & 5 deletions ray-operator/controllers/ray/utils/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ func createSomePodWithCondition(typ corev1.PodConditionType, status corev1.Condi
}
}

func createRayHeadPodWithPhaseAndCondition(phase corev1.PodPhase, typ corev1.PodConditionType, status corev1.ConditionStatus) (pod *corev1.Pod) {
func createRayHeadPodWithPhaseAndCondition(phase corev1.PodPhase, status corev1.ConditionStatus) (pod *corev1.Pod) {
return &corev1.Pod{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Expand All @@ -345,8 +345,9 @@ func createRayHeadPodWithPhaseAndCondition(phase corev1.PodPhase, typ corev1.Pod
Phase: phase,
Conditions: []corev1.PodCondition{
{
Type: typ,
Type: corev1.PodReady,
Status: status,
Reason: ContainersNotReady,
},
},
},
Expand Down Expand Up @@ -945,23 +946,23 @@ func TestFindHeadPodReadyCondition(t *testing.T) {
}{
{
name: "condition true if Ray head pod is running and ready",
pod: createRayHeadPodWithPhaseAndCondition(corev1.PodRunning, corev1.PodReady, corev1.ConditionTrue),
pod: createRayHeadPodWithPhaseAndCondition(corev1.PodRunning, corev1.ConditionTrue),
expected: metav1.Condition{
Type: string(rayv1.HeadPodReady),
Status: metav1.ConditionTrue,
},
},
{
name: "condition false if Ray head pod is not running",
pod: createRayHeadPodWithPhaseAndCondition(corev1.PodPending, corev1.PodReady, corev1.ConditionFalse),
pod: createRayHeadPodWithPhaseAndCondition(corev1.PodPending, corev1.ConditionFalse),
expected: metav1.Condition{
Type: string(rayv1.HeadPodReady),
Status: metav1.ConditionFalse,
},
},
{
name: "condition false if Ray head pod is not ready",
pod: createRayHeadPodWithPhaseAndCondition(corev1.PodRunning, corev1.PodReady, corev1.ConditionFalse),
pod: createRayHeadPodWithPhaseAndCondition(corev1.PodRunning, corev1.ConditionFalse),
expected: metav1.Condition{
Type: string(rayv1.HeadPodReady),
Status: metav1.ConditionFalse,
Expand All @@ -977,6 +978,88 @@ func TestFindHeadPodReadyCondition(t *testing.T) {
}
}

func TestFindHeadPodReadyMessage(t *testing.T) {
tests := []struct {
name string
message string
wantMessage string
wantReason string
status []corev1.ContainerStatus
}{{
name: "no message no status want original reason",
wantReason: ContainersNotReady,
}, {
name: "no container status want original reason",
message: "TooEarlyInTheMorning",
wantMessage: "TooEarlyInTheMorning",
wantReason: ContainersNotReady,
}, {
name: "one reason one status",
message: "containers not ready",
status: []corev1.ContainerStatus{{
Name: "ray",
State: corev1.ContainerState{
Waiting: &corev1.ContainerStateWaiting{
Reason: "ImagePullBackOff",
Message: `Back-off pulling image royproject/roy:latest: ErrImagePull: rpc error: code = NotFound`,
},
},
}},
wantReason: "ImagePullBackOff",
wantMessage: `containers not ready; ray: Back-off pulling image royproject/roy:latest: ErrImagePull: rpc error: code = NotFound`,
}, {
name: "one reason two statuses only copy first",
message: "aesthetic problems",
status: []corev1.ContainerStatus{{
Name: "indigo",
State: corev1.ContainerState{
Waiting: &corev1.ContainerStateWaiting{
Reason: "BadColor",
Message: "too blue",
},
},
}, {
Name: "circle",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
Reason: "BadGeometry",
Message: "too round",
},
},
}},
wantReason: "BadColor",
wantMessage: "aesthetic problems; indigo: too blue",
}, {
name: "no reason one status",
status: []corev1.ContainerStatus{{
Name: "my-image",
State: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
Reason: "Crashed",
Message: "bash not found",
},
},
}},
wantReason: "Crashed",
wantMessage: "my-image: bash not found",
}}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
pod := createRayHeadPodWithPhaseAndCondition(corev1.PodPending, corev1.ConditionFalse)
pod.Status.Conditions[0].Message = tc.message
pod.Status.ContainerStatuses = tc.status
cond := FindHeadPodReadyCondition(pod)
if cond.Message != tc.wantMessage {
t.Errorf("FindHeadPodReadyCondition(...) returned condition with message %q, but wanted %q", cond.Message, tc.wantMessage)
}
if cond.Reason != tc.wantReason {
t.Errorf("FindHeadPodReadyCondition(...) returned condition with reason %q, but wanted %q", cond.Reason, tc.wantReason)
}
})
}
}

func TestErrRayClusterReplicaFailureReason(t *testing.T) {
assert.Equal(t, "FailedDeleteAllPods", RayClusterReplicaFailureReason(ErrFailedDeleteAllPods))
assert.Equal(t, "FailedDeleteHeadPod", RayClusterReplicaFailureReason(ErrFailedDeleteHeadPod))
Expand Down
Loading