Skip to content

Commit 138db52

Browse files
Merge pull request #2367 from dusk125/rebase-v1.32.7
OCPBUGS-59534: Rebase v1.32.7 to 4.19
2 parents 38c60a5 + 4c3192d commit 138db52

File tree

8 files changed

+379
-72
lines changed

8 files changed

+379
-72
lines changed

CHANGELOG/CHANGELOG-1.32.md

Lines changed: 208 additions & 70 deletions
Large diffs are not rendered by default.

cmd/kubeadm/app/util/etcd/etcd.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,15 @@ func (c *Client) MemberPromote(learnerID uint64) error {
576576
ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout)
577577
defer cancel()
578578

579+
isLearner, err := c.isLearner(learnerID)
580+
if err != nil {
581+
return false, err
582+
}
583+
if !isLearner {
584+
klog.V(1).Infof("[etcd] Member %s was already promoted.", strconv.FormatUint(learnerID, 16))
585+
return true, nil
586+
}
587+
579588
_, err = cli.MemberPromote(ctx, learnerID)
580589
if err == nil {
581590
klog.V(1).Infof("[etcd] The learner was promoted as a voting member: %s", strconv.FormatUint(learnerID, 16))

openshift-hack/images/hyperkube/Dockerfile.rhel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@ COPY --from=builder /tmp/build/* /usr/bin/
1414
LABEL io.k8s.display-name="OpenShift Kubernetes Server Commands" \
1515
io.k8s.description="OpenShift is a platform for developing, building, and deploying containerized applications." \
1616
io.openshift.tags="openshift,hyperkube" \
17-
io.openshift.build.versions="kubernetes=1.32.6"
17+
io.openshift.build.versions="kubernetes=1.32.7"

pkg/controller/job/job_controller.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,12 @@ func (jm *Controller) deleteJob(logger klog.Logger, obj interface{}) {
537537
}
538538
}
539539
jm.enqueueLabelSelector(jobObj)
540+
541+
key := cache.MetaObjectToName(jobObj).String()
542+
err := jm.podBackoffStore.removeBackoffRecord(key)
543+
if err != nil {
544+
utilruntime.HandleError(fmt.Errorf("error removing backoff record %w", err))
545+
}
540546
}
541547

542548
func (jm *Controller) enqueueLabelSelector(jobObj *batch.Job) {

pkg/kubelet/images/image_gc_manager.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,10 @@ func (im *realImageGCManager) freeImage(ctx context.Context, image evictionInfo,
494494
if isRuntimeClassInImageCriAPIEnabled {
495495
imageKey = getImageTuple(image.id, image.runtimeHandlerUsedToPullImage)
496496
}
497+
498+
im.imageRecordsLock.Lock()
497499
delete(im.imageRecords, imageKey)
500+
im.imageRecordsLock.Unlock()
498501

499502
metrics.ImageGarbageCollectedTotal.WithLabelValues(reason).Inc()
500503
return err

pkg/registry/batch/job/strategy.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ func getStatusValidationOptions(newJob, oldJob *batch.Job) batchvalidation.JobSt
379379
isUncountedTerminatedPodsChanged := !apiequality.Semantic.DeepEqual(oldJob.Status.UncountedTerminatedPods, newJob.Status.UncountedTerminatedPods)
380380
isReadyChanged := !ptr.Equal(oldJob.Status.Ready, newJob.Status.Ready)
381381
isTerminatingChanged := !ptr.Equal(oldJob.Status.Terminating, newJob.Status.Terminating)
382+
isSuspendedWithZeroCompletions := ptr.Equal(newJob.Spec.Suspend, ptr.To(true)) && ptr.Equal(newJob.Spec.Completions, ptr.To[int32](0))
382383

383384
return batchvalidation.JobStatusValidationOptions{
384385
// We allow to decrease the counter for succeeded pods for jobs which
@@ -394,7 +395,7 @@ func getStatusValidationOptions(newJob, oldJob *batch.Job) batchvalidation.JobSt
394395
RejectFailedJobWithoutFailureTarget: isJobFailedChanged || isFailedIndexesChanged,
395396
RejectCompleteJobWithoutSuccessCriteriaMet: isJobCompleteChanged || isJobSuccessCriteriaMetChanged,
396397
RejectFinishedJobWithActivePods: isJobFinishedChanged || isActiveChanged,
397-
RejectFinishedJobWithoutStartTime: isJobFinishedChanged || isStartTimeChanged,
398+
RejectFinishedJobWithoutStartTime: (isJobFinishedChanged || isStartTimeChanged) && !isSuspendedWithZeroCompletions,
398399
RejectFinishedJobWithUncountedTerminatedPods: isJobFinishedChanged || isUncountedTerminatedPodsChanged,
399400
RejectStartTimeUpdateForUnsuspendedJob: isStartTimeChanged,
400401
RejectCompletionTimeBeforeStartTime: isStartTimeChanged || isCompletionTimeChanged,

pkg/registry/batch/job/strategy_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3561,6 +3561,36 @@ func TestStatusStrategy_ValidateUpdate(t *testing.T) {
35613561
{Type: field.ErrorTypeInvalid, Field: "status.ready"},
35623562
},
35633563
},
3564+
"valid transition to Complete for suspended Job with completions=0; without startTime": {
3565+
enableJobManagedBy: true,
3566+
job: &batch.Job{
3567+
ObjectMeta: validObjectMeta,
3568+
Spec: batch.JobSpec{
3569+
Completions: ptr.To[int32](0),
3570+
Suspend: ptr.To(true),
3571+
},
3572+
},
3573+
newJob: &batch.Job{
3574+
ObjectMeta: validObjectMeta,
3575+
Spec: batch.JobSpec{
3576+
Completions: ptr.To[int32](0),
3577+
Suspend: ptr.To(true),
3578+
},
3579+
Status: batch.JobStatus{
3580+
CompletionTime: &now,
3581+
Conditions: []batch.JobCondition{
3582+
{
3583+
Type: batch.JobSuccessCriteriaMet,
3584+
Status: api.ConditionTrue,
3585+
},
3586+
{
3587+
Type: batch.JobComplete,
3588+
Status: api.ConditionTrue,
3589+
},
3590+
},
3591+
},
3592+
},
3593+
},
35643594
}
35653595
for name, tc := range cases {
35663596
t.Run(name, func(t *testing.T) {

test/integration/job/job_test.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2271,6 +2271,103 @@ func TestManagedBy_Reenabling(t *testing.T) {
22712271
})
22722272
}
22732273

2274+
// TestImmediateJobRecreation verifies that the replacement Job creates the Pods
2275+
// quickly after re-creation, see https://github.com/kubernetes/kubernetes/issues/132042.
2276+
func TestImmediateJobRecreation(t *testing.T) {
2277+
// set the backoff delay very high to make sure the test does not pass waiting long on asserts
2278+
t.Cleanup(setDurationDuringTest(&jobcontroller.DefaultJobPodFailureBackOff, 2*wait.ForeverTestTimeout))
2279+
closeFn, restConfig, clientSet, ns := setup(t, "recreate-job-immediately")
2280+
t.Cleanup(closeFn)
2281+
ctx, cancel := startJobControllerAndWaitForCaches(t, restConfig)
2282+
t.Cleanup(cancel)
2283+
2284+
baseJob := batchv1.Job{
2285+
ObjectMeta: metav1.ObjectMeta{
2286+
Namespace: ns.Name,
2287+
},
2288+
Spec: batchv1.JobSpec{
2289+
Completions: ptr.To[int32](1),
2290+
Parallelism: ptr.To[int32](1),
2291+
Template: v1.PodTemplateSpec{
2292+
Spec: v1.PodSpec{
2293+
Containers: []v1.Container{
2294+
{
2295+
Name: "main-container",
2296+
Image: "foo",
2297+
},
2298+
},
2299+
},
2300+
},
2301+
},
2302+
}
2303+
jobSpec := func(idx int) batchv1.Job {
2304+
spec := baseJob.DeepCopy()
2305+
spec.Name = fmt.Sprintf("test-job-%d", idx)
2306+
return *spec
2307+
}
2308+
2309+
var jobObjs []*batchv1.Job
2310+
// We create multiple Jobs to make the repro more likely. In particular, we need
2311+
// more Jobs than the number of Job controller workers to make it very unlikely
2312+
// that syncJob executes (and cleans the in-memory state) before the corresponding
2313+
// replacement Jobs are created.
2314+
for i := 0; i < 3; i++ {
2315+
jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, ptr.To(jobSpec(i)))
2316+
if err != nil {
2317+
t.Fatalf("Error %v when creating the job %q", err, klog.KObj(jobObj))
2318+
}
2319+
jobObjs = append(jobObjs, jobObj)
2320+
}
2321+
2322+
for _, jobObj := range jobObjs {
2323+
validateJobsPodsStatusOnly(ctx, t, clientSet, jobObj, podsByStatus{
2324+
Active: 1,
2325+
Ready: ptr.To[int32](0),
2326+
Terminating: ptr.To[int32](0),
2327+
})
2328+
2329+
if _, err := setJobPodsPhase(ctx, clientSet, jobObj, v1.PodFailed, 1); err != nil {
2330+
t.Fatalf("Error %v when setting phase %s on the pod of job %v", err, v1.PodFailed, klog.KObj(jobObj))
2331+
}
2332+
2333+
// Await to account for the failed Pod
2334+
validateJobsPodsStatusOnly(ctx, t, clientSet, jobObj, podsByStatus{
2335+
Failed: 1,
2336+
Ready: ptr.To[int32](0),
2337+
Terminating: ptr.To[int32](0),
2338+
})
2339+
}
2340+
2341+
for i := 0; i < len(jobObjs); i++ {
2342+
jobObj := jobObjs[i]
2343+
jobClient := clientSet.BatchV1().Jobs(jobObj.Namespace)
2344+
if err := jobClient.Delete(ctx, jobObj.Name, metav1.DeleteOptions{
2345+
// Use propagationPolicy=background so that we don't need to wait for the job object to be gone.
2346+
PropagationPolicy: ptr.To(metav1.DeletePropagationBackground),
2347+
}); err != nil {
2348+
t.Fatalf("Error %v when deleting the job %v", err, klog.KObj(jobObj))
2349+
}
2350+
2351+
// re-create the job immediately
2352+
jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, ptr.To(jobSpec(i)))
2353+
if err != nil {
2354+
t.Fatalf("Error %q while creating the job %q", err, klog.KObj(jobObj))
2355+
}
2356+
jobObjs[i] = jobObj
2357+
}
2358+
2359+
// total timeout (3*5s) is less than 2*ForeverTestTimeout.
2360+
for _, jobObj := range jobObjs {
2361+
// wait maks 5s for the Active=1. This assert verifies that the backoff
2362+
// delay is not applied to the replacement instance of the Job.
2363+
validateJobsPodsStatusOnlyWithTimeout(ctx, t, clientSet, jobObj, podsByStatus{
2364+
Active: 1,
2365+
Ready: ptr.To[int32](0),
2366+
Terminating: ptr.To[int32](0),
2367+
}, 5*time.Second)
2368+
}
2369+
}
2370+
22742371
// TestManagedBy_RecreatedJob verifies that the Job controller skips
22752372
// reconciliation of a job with managedBy field, when this is a recreated job,
22762373
// and there is still a pending sync queued for the previous job.
@@ -3965,6 +4062,29 @@ func TestSuspendJob(t *testing.T) {
39654062
}
39664063
}
39674064

4065+
// TestSuspendJobWithZeroCompletions verifies the suspended Job with
4066+
// completions=0 is marked as Complete.
4067+
func TestSuspendJobWithZeroCompletions(t *testing.T) {
4068+
closeFn, restConfig, clientSet, ns := setup(t, "suspended-with-zero-completions")
4069+
t.Cleanup(closeFn)
4070+
ctx, cancel := startJobControllerAndWaitForCaches(t, restConfig)
4071+
t.Cleanup(func() {
4072+
cancel()
4073+
})
4074+
jobObj, err := createJobWithDefaults(ctx, clientSet, ns.Name, &batchv1.Job{
4075+
Spec: batchv1.JobSpec{
4076+
Completions: ptr.To[int32](0),
4077+
Suspend: ptr.To(true),
4078+
},
4079+
})
4080+
if err != nil {
4081+
t.Fatalf("Failed to create Job: %v", err)
4082+
}
4083+
for _, condition := range []batchv1.JobConditionType{batchv1.JobSuccessCriteriaMet, batchv1.JobComplete} {
4084+
validateJobCondition(ctx, t, clientSet, jobObj, condition)
4085+
}
4086+
}
4087+
39684088
func TestSuspendJobControllerRestart(t *testing.T) {
39694089
closeFn, restConfig, clientSet, ns := setup(t, "suspend")
39704090
t.Cleanup(closeFn)

0 commit comments

Comments
 (0)