Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 8 additions & 16 deletions lib/resourcebuilder/apps.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,32 +82,28 @@ func (b *builder) checkDeploymentHealth(ctx context.Context, deployment *appsv1.
}

iden := fmt.Sprintf("%s/%s", deployment.Namespace, deployment.Name)
d, err := b.appsClientv1.Deployments(deployment.Namespace).Get(ctx, deployment.Name, metav1.GetOptions{})
if err != nil {
return err
}

if d.DeletionTimestamp != nil {
if deployment.DeletionTimestamp != nil {
return fmt.Errorf("deployment %s is being deleted", iden)
}

var availableCondition *appsv1.DeploymentCondition
var progressingCondition *appsv1.DeploymentCondition
var replicaFailureCondition *appsv1.DeploymentCondition
for idx, dc := range d.Status.Conditions {
for idx, dc := range deployment.Status.Conditions {
switch dc.Type {
case appsv1.DeploymentProgressing:
progressingCondition = &d.Status.Conditions[idx]
progressingCondition = &deployment.Status.Conditions[idx]
case appsv1.DeploymentAvailable:
availableCondition = &d.Status.Conditions[idx]
availableCondition = &deployment.Status.Conditions[idx]
case appsv1.DeploymentReplicaFailure:
replicaFailureCondition = &d.Status.Conditions[idx]
replicaFailureCondition = &deployment.Status.Conditions[idx]
}
}

if replicaFailureCondition != nil && replicaFailureCondition.Status == corev1.ConditionTrue {
return &payload.UpdateError{
Nested: fmt.Errorf("deployment %s has some pods failing; unavailable replicas=%d", iden, d.Status.UnavailableReplicas),
Nested: fmt.Errorf("deployment %s has some pods failing; unavailable replicas=%d", iden, deployment.Status.UnavailableReplicas),
Reason: "WorkloadNotProgressing",
Message: fmt.Sprintf("deployment %s has a replica failure %s: %s", iden, replicaFailureCondition.Reason, replicaFailureCondition.Message),
Name: iden,
Expand All @@ -116,7 +112,7 @@ func (b *builder) checkDeploymentHealth(ctx context.Context, deployment *appsv1.

if availableCondition != nil && availableCondition.Status == corev1.ConditionFalse && progressingCondition != nil && progressingCondition.Status == corev1.ConditionFalse {
return &payload.UpdateError{
Nested: fmt.Errorf("deployment %s is not available and not progressing; updated replicas=%d of %d, available replicas=%d of %d", iden, d.Status.UpdatedReplicas, d.Status.Replicas, d.Status.AvailableReplicas, d.Status.Replicas),
Nested: fmt.Errorf("deployment %s is not available and not progressing; updated replicas=%d of %d, available replicas=%d of %d", iden, deployment.Status.UpdatedReplicas, deployment.Status.Replicas, deployment.Status.AvailableReplicas, deployment.Status.Replicas),
Reason: "WorkloadNotAvailable",
Message: fmt.Sprintf("deployment %s is not available %s (%s) or progressing %s (%s)", iden, availableCondition.Reason, availableCondition.Message, progressingCondition.Reason, progressingCondition.Message),
Name: iden,
Expand Down Expand Up @@ -168,12 +164,8 @@ func (b *builder) checkDaemonSetHealth(ctx context.Context, daemonset *appsv1.Da
}

iden := fmt.Sprintf("%s/%s", daemonset.Namespace, daemonset.Name)
d, err := b.appsClientv1.DaemonSets(daemonset.Namespace).Get(ctx, daemonset.Name, metav1.GetOptions{})
if err != nil {
return err
}

if d.DeletionTimestamp != nil {
if daemonset.DeletionTimestamp != nil {
return fmt.Errorf("daemonset %s is being deleted", iden)
}

Expand Down
22 changes: 11 additions & 11 deletions lib/resourcebuilder/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@ import (
// WaitForJobCompletion waits for job to complete.
func WaitForJobCompletion(ctx context.Context, client batchclientv1.JobsGetter, job *batchv1.Job) error {
return wait.PollImmediateUntil(defaultObjectPollInterval, func() (bool, error) {
if done, err := checkJobHealth(ctx, client, job); err != nil && done {
j, err := client.Jobs(job.Namespace).Get(ctx, job.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("error getting Job %s: %v", job.Name, err)
}

if done, err := checkJobHealth(ctx, j); err != nil && done {
return false, err
} else if err != nil {
klog.Error(err)
Expand All @@ -33,29 +38,24 @@ func (b *builder) checkJobHealth(ctx context.Context, job *batchv1.Job) error {
return nil
}

_, err := checkJobHealth(ctx, b.batchClientv1, job)
_, err := checkJobHealth(ctx, job)
return err
}

// checkJobHealth returns an error if the job status is bad enough to block further manifest application.
func checkJobHealth(ctx context.Context, client batchclientv1.JobsGetter, job *batchv1.Job) (bool, error) {
j, err := client.Jobs(job.Namespace).Get(ctx, job.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("error getting Job %s: %v", job.Name, err)
}

if j.Status.Succeeded > 0 {
func checkJobHealth(ctx context.Context, job *batchv1.Job) (bool, error) {
if job.Status.Succeeded > 0 {
return true, nil
}

// Since we have filled in "activeDeadlineSeconds",
// the Job will 'Active == 0' if and only if it exceeds the deadline or if the update image could not be pulled.
// Failed jobs will be recreated in the next run.
if j.Status.Active == 0 {
if job.Status.Active == 0 {
klog.V(2).Infof("No active pods for job %s in namespace %s", job.Name, job.Namespace)
failed, reason, message := hasJobFailed(job)
// If there is more than one failed job pod then get the cause for failure
if j.Status.Failed > 0 {
if job.Status.Failed > 0 {
failureReason := "DeadlineExceeded"
failureMessage := "Job was active longer than specified deadline"
if failed {
Expand Down