From d26a7257390f8d5c3883c8ee0117f7f125f04301 Mon Sep 17 00:00:00 2001 From: cedric lamoriniere Date: Tue, 5 Sep 2017 17:26:22 +0200 Subject: [PATCH] Update job workload doc with backoff failure policy Add to the Jobs documentation how to use the new backoffLimit field that limit the number of Pod failure before considering the Job as failed. --- docs/concepts/workloads/controllers/job.yaml | 1 + .../workloads/controllers/jobs-run-to-completion.md | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/docs/concepts/workloads/controllers/job.yaml b/docs/concepts/workloads/controllers/job.yaml index ece4512a8acfc..eb8af28fb6930 100644 --- a/docs/concepts/workloads/controllers/job.yaml +++ b/docs/concepts/workloads/controllers/job.yaml @@ -12,4 +12,5 @@ spec: image: perl command: ["perl", "-Mbignum=bpi", "-wle", "print bpi(2000)"] restartPolicy: Never + backoffLimit: 4 diff --git a/docs/concepts/workloads/controllers/jobs-run-to-completion.md b/docs/concepts/workloads/controllers/jobs-run-to-completion.md index 1c5ceed3dfb4c..2f1ff5f51ac70 100644 --- a/docs/concepts/workloads/controllers/jobs-run-to-completion.md +++ b/docs/concepts/workloads/controllers/jobs-run-to-completion.md @@ -183,6 +183,12 @@ sometimes be started twice. If you do specify `.spec.parallelism` and `.spec.completions` both greater than 1, then there may be multiple pods running at once. Therefore, your pods must also be tolerant of concurrency. +### Pod Backoff failure policy + +There are situations where you want to fail a Job after some amount of retries due to a logical error in configuration etc. +To do so set `.spec.template.spec.backoffLimit` to specify the number of retries before considering a Job as failed. +The back-off limit is set by default to 6. Failed Pods associated with the Job are recreated by the Job controller with an exponential back-off delay (10s, 20s, 40s ...) capped at six minutes, The back-off limit is reset if no new failed Pods appear before the Job's next status check. + ## Job Termination and Cleanup When a Job completes, no more Pods are created, but the Pods are not deleted either. Since they are terminated, @@ -217,6 +223,7 @@ spec: image: perl command: ["perl", "-Mbignum=bpi", "-wle", "print bpi(2000)"] restartPolicy: Never + backoffLimit: 5 ``` Note that both the Job Spec and the Pod Template Spec within the Job have a field with the same name.