@@ -2271,6 +2271,103 @@ func TestManagedBy_Reenabling(t *testing.T) {
22712271 })
22722272}
22732273
2274+ // TestImmediateJobRecreation verifies that the replacement Job creates the Pods
2275+ // quickly after re-creation, see https://github.com/kubernetes/kubernetes/issues/132042.
2276+ func TestImmediateJobRecreation (t * testing.T ) {
2277+ // set the backoff delay very high to make sure the test does not pass waiting long on asserts
2278+ t .Cleanup (setDurationDuringTest (& jobcontroller .DefaultJobPodFailureBackOff , 2 * wait .ForeverTestTimeout ))
2279+ closeFn , restConfig , clientSet , ns := setup (t , "recreate-job-immediately" )
2280+ t .Cleanup (closeFn )
2281+ ctx , cancel := startJobControllerAndWaitForCaches (t , restConfig )
2282+ t .Cleanup (cancel )
2283+
2284+ baseJob := batchv1.Job {
2285+ ObjectMeta : metav1.ObjectMeta {
2286+ Namespace : ns .Name ,
2287+ },
2288+ Spec : batchv1.JobSpec {
2289+ Completions : ptr.To [int32 ](1 ),
2290+ Parallelism : ptr.To [int32 ](1 ),
2291+ Template : v1.PodTemplateSpec {
2292+ Spec : v1.PodSpec {
2293+ Containers : []v1.Container {
2294+ {
2295+ Name : "main-container" ,
2296+ Image : "foo" ,
2297+ },
2298+ },
2299+ },
2300+ },
2301+ },
2302+ }
2303+ jobSpec := func (idx int ) batchv1.Job {
2304+ spec := baseJob .DeepCopy ()
2305+ spec .Name = fmt .Sprintf ("test-job-%d" , idx )
2306+ return * spec
2307+ }
2308+
2309+ var jobObjs []* batchv1.Job
2310+ // We create multiple Jobs to make the repro more likely. In particular, we need
2311+ // more Jobs than the number of Job controller workers to make it very unlikely
2312+ // that syncJob executes (and cleans the in-memory state) before the corresponding
2313+ // replacement Jobs are created.
2314+ for i := 0 ; i < 3 ; i ++ {
2315+ jobObj , err := createJobWithDefaults (ctx , clientSet , ns .Name , ptr .To (jobSpec (i )))
2316+ if err != nil {
2317+ t .Fatalf ("Error %v when creating the job %q" , err , klog .KObj (jobObj ))
2318+ }
2319+ jobObjs = append (jobObjs , jobObj )
2320+ }
2321+
2322+ for _ , jobObj := range jobObjs {
2323+ validateJobsPodsStatusOnly (ctx , t , clientSet , jobObj , podsByStatus {
2324+ Active : 1 ,
2325+ Ready : ptr.To [int32 ](0 ),
2326+ Terminating : ptr.To [int32 ](0 ),
2327+ })
2328+
2329+ if _ , err := setJobPodsPhase (ctx , clientSet , jobObj , v1 .PodFailed , 1 ); err != nil {
2330+ t .Fatalf ("Error %v when setting phase %s on the pod of job %v" , err , v1 .PodFailed , klog .KObj (jobObj ))
2331+ }
2332+
2333+ // Await to account for the failed Pod
2334+ validateJobsPodsStatusOnly (ctx , t , clientSet , jobObj , podsByStatus {
2335+ Failed : 1 ,
2336+ Ready : ptr.To [int32 ](0 ),
2337+ Terminating : ptr.To [int32 ](0 ),
2338+ })
2339+ }
2340+
2341+ for i := 0 ; i < len (jobObjs ); i ++ {
2342+ jobObj := jobObjs [i ]
2343+ jobClient := clientSet .BatchV1 ().Jobs (jobObj .Namespace )
2344+ if err := jobClient .Delete (ctx , jobObj .Name , metav1.DeleteOptions {
2345+ // Use propagationPolicy=background so that we don't need to wait for the job object to be gone.
2346+ PropagationPolicy : ptr .To (metav1 .DeletePropagationBackground ),
2347+ }); err != nil {
2348+ t .Fatalf ("Error %v when deleting the job %v" , err , klog .KObj (jobObj ))
2349+ }
2350+
2351+ // re-create the job immediately
2352+ jobObj , err := createJobWithDefaults (ctx , clientSet , ns .Name , ptr .To (jobSpec (i )))
2353+ if err != nil {
2354+ t .Fatalf ("Error %q while creating the job %q" , err , klog .KObj (jobObj ))
2355+ }
2356+ jobObjs [i ] = jobObj
2357+ }
2358+
2359+ // total timeout (3*5s) is less than 2*ForeverTestTimeout.
2360+ for _ , jobObj := range jobObjs {
2361+ // wait maks 5s for the Active=1. This assert verifies that the backoff
2362+ // delay is not applied to the replacement instance of the Job.
2363+ validateJobsPodsStatusOnlyWithTimeout (ctx , t , clientSet , jobObj , podsByStatus {
2364+ Active : 1 ,
2365+ Ready : ptr.To [int32 ](0 ),
2366+ Terminating : ptr.To [int32 ](0 ),
2367+ }, 5 * time .Second )
2368+ }
2369+ }
2370+
22742371// TestManagedBy_RecreatedJob verifies that the Job controller skips
22752372// reconciliation of a job with managedBy field, when this is a recreated job,
22762373// and there is still a pending sync queued for the previous job.
@@ -3965,6 +4062,29 @@ func TestSuspendJob(t *testing.T) {
39654062 }
39664063}
39674064
4065+ // TestSuspendJobWithZeroCompletions verifies the suspended Job with
4066+ // completions=0 is marked as Complete.
4067+ func TestSuspendJobWithZeroCompletions (t * testing.T ) {
4068+ closeFn , restConfig , clientSet , ns := setup (t , "suspended-with-zero-completions" )
4069+ t .Cleanup (closeFn )
4070+ ctx , cancel := startJobControllerAndWaitForCaches (t , restConfig )
4071+ t .Cleanup (func () {
4072+ cancel ()
4073+ })
4074+ jobObj , err := createJobWithDefaults (ctx , clientSet , ns .Name , & batchv1.Job {
4075+ Spec : batchv1.JobSpec {
4076+ Completions : ptr.To [int32 ](0 ),
4077+ Suspend : ptr .To (true ),
4078+ },
4079+ })
4080+ if err != nil {
4081+ t .Fatalf ("Failed to create Job: %v" , err )
4082+ }
4083+ for _ , condition := range []batchv1.JobConditionType {batchv1 .JobSuccessCriteriaMet , batchv1 .JobComplete } {
4084+ validateJobCondition (ctx , t , clientSet , jobObj , condition )
4085+ }
4086+ }
4087+
39684088func TestSuspendJobControllerRestart (t * testing.T ) {
39694089 closeFn , restConfig , clientSet , ns := setup (t , "suspend" )
39704090 t .Cleanup (closeFn )
0 commit comments