Skip to content

Commit

Permalink
Add pod liveness check before starting node.
Browse files Browse the repository at this point in the history
  • Loading branch information
Garry Dmello committed Feb 24, 2025
1 parent 990d1a1 commit ff5373f
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 3 deletions.
2 changes: 1 addition & 1 deletion config/manager/image_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: ImageConfig
metadata:
name: image-config
images:
system-logger: "k8ssandra/system-logger:v1.24.0-dev.a8fa96c-20241219"
system-logger: "k8ssandra/system-logger:v1.24.0-dev.2736e9a-20250220"
config-builder: "datastax/cass-config-builder:1.0-ubi8"
k8ssandra-client: "k8ssandra/k8ssandra-client:v0.6.0"
# cassandra:
Expand Down
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ kind: Kustomization
images:
- name: controller
newName: k8ssandra/cass-operator
newTag: v1.24.0-dev.a8fa96c-20241219
newTag: v1.24.0-dev.2736e9a-20250220
25 changes: 25 additions & 0 deletions pkg/httphelper/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,31 @@ func (client *NodeMgmtClient) CallCreateRoleEndpoint(pod *corev1.Pod, username s
return nil
}

func (client *NodeMgmtClient) CallLivenessEndpoint(pod *corev1.Pod) error {
client.Log.Info("requesting Cassandra liveness from Node Management API", "pod", pod.Name)

podHost, podPort, err := BuildPodHostFromPod(pod)
if err != nil {
return err
}

request := nodeMgmtRequest{
endpoint: "/api/v0/probes/liveness",
host: podHost,
port: podPort,
method: http.MethodGet,
timeout: 60 * time.Second,
}

res, err := callNodeMgmtEndpoint(client, request, "")
client.Log.Info("requesting Cassandra liveness from Node Management API succeeded ", "pod", pod.Name, "response", res)
if err != nil {
return err

}
return nil
}

// CallDropRoleEndpoint drops an existing role from the cluster
func (client *NodeMgmtClient) CallDropRoleEndpoint(pod *corev1.Pod, username string) error {
client.Log.Info(
Expand Down
34 changes: 34 additions & 0 deletions pkg/reconciliation/reconcile_racks.go
Original file line number Diff line number Diff line change
Expand Up @@ -2062,6 +2062,17 @@ func (rc *ReconciliationContext) startNode(pod *corev1.Pod, labelSeedBeforeStart
"Labeled pod a seed node %s", pod.Name)
}

// Check if pod is live before starting server
isAlive, err := rc.waitForPodLiveness(pod)
if err != nil {
return true, err
}
if isAlive {
if err := rc.startCassandra(endpointData, pod); err != nil {
return true, err
}
}

if err := rc.startCassandra(endpointData, pod); err != nil {
return true, err
}
Expand All @@ -2071,6 +2082,29 @@ func (rc *ReconciliationContext) startNode(pod *corev1.Pod, labelSeedBeforeStart
return false, nil
}

func (rc *ReconciliationContext) waitForPodLiveness(pod *corev1.Pod) (bool, error) {
timeout := time.After(5 * time.Minute) // Timeout after 5 minutes
ticker := time.NewTicker(5 * time.Second) // Retry every 5 seconds

for {
select {
case <-timeout:
// Timeout reached
rc.ReqLogger.Info("Timed out after 15 minutes of retries")
return false, nil
case <-ticker.C:
// Call the Liveness endpoint
if err := rc.NodeMgmtClient.CallLivenessEndpoint(pod); err != nil {
rc.ReqLogger.Info("Liveness probe failing before starting pod " + pod.Name + " with error " + err.Error())
continue
}
// If no error, return success
rc.ReqLogger.Info("Liveness probe succeeded for pod " + pod.Name)
return true, nil
}
}
}

func (rc *ReconciliationContext) countReadyAndStarted() (int, int) {
ready := 0
started := 0
Expand Down
2 changes: 1 addition & 1 deletion tests/kustomize/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This is the default kustomize template for tests.
namespace: kustomize
namespace: test-webhook-validation

apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
Expand Down

0 comments on commit ff5373f

Please sign in to comment.