From 0437b7263e4744d2578c6d964f9d5a388ef54856 Mon Sep 17 00:00:00 2001 From: catsby Date: Thu, 28 Apr 2022 17:08:57 -0500 Subject: [PATCH 1/2] patch k8s stop task to only delete the job if it had pending pods --- builtin/k8s/task.go | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/builtin/k8s/task.go b/builtin/k8s/task.go index bd0a3b087c0..9999343054e 100644 --- a/builtin/k8s/task.go +++ b/builtin/k8s/task.go @@ -160,15 +160,6 @@ func (p *TaskLauncher) StopTask( ns = p.config.Namespace } - // Delete the job. This does *not* delete any running pods that the job - // created. - jobsClient := clientSet.BatchV1().Jobs(ns) - if err := jobsClient.Delete(ctx, ti.Id, metav1.DeleteOptions{}); err != nil { - if !errors.IsNotFound(err) { - return err - } - } - // List pods with this job label podsClient := clientSet.CoreV1().Pods(ns) pods, err := podsClient.List(ctx, metav1.ListOptions{ @@ -185,11 +176,27 @@ func (p *TaskLauncher) StopTask( return nil } - // Delete any pods stuck in pending + // Find any pods stuck in pending + var pendingPods []string for _, p := range pods.Items { if p.Status.Phase == corev1.PodPending { + pendingPods = append(pendingPods, p.Name) + } + } + + // If we've found pending/stuck pods, attempt to clean up + if len(pendingPods) > 0 { + // Delete the job. This does *not* delete any running pods that the job + // created. + jobsClient := clientSet.BatchV1().Jobs(ns) + if err := jobsClient.Delete(ctx, ti.Id, metav1.DeleteOptions{}); err != nil { + if !errors.IsNotFound(err) { + return err + } + } + for _, name := range pendingPods { log.Warn("job pod is in pending phase in StopTask operation, cancelling", "job_id", ti.Id) - if err := podsClient.Delete(ctx, p.Name, metav1.DeleteOptions{}); err != nil { + if err := podsClient.Delete(ctx, name, metav1.DeleteOptions{}); err != nil { if !errors.IsNotFound(err) { return err } From 544e03b8e5aaf72119a8c2f73d160a8b612be698 Mon Sep 17 00:00:00 2001 From: catsby Date: Thu, 28 Apr 2022 17:15:43 -0500 Subject: [PATCH 2/2] changelog 3299 --- .changelog/3299.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/3299.txt diff --git a/.changelog/3299.txt b/.changelog/3299.txt new file mode 100644 index 00000000000..4e927023b78 --- /dev/null +++ b/.changelog/3299.txt @@ -0,0 +1,3 @@ +```release-note:bug +server: fix issue cleaning up tasks in Kubernetes that completed successfully +```