Skip to content

Commit 74f7a44

Browse files
authored
Merge pull request kubernetes#133276 from macsko/stop_clearing_nnn_in_all_cases
KEP-5278 Stop clearing NominatedNodeName in all cases
2 parents 144d198 + aea0a3c commit 74f7a44

File tree

3 files changed

+224
-78
lines changed

3 files changed

+224
-78
lines changed

pkg/scheduler/schedule_one.go

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,15 @@ func (sched *Scheduler) ScheduleOne(ctx context.Context) {
136136
}()
137137
}
138138

139-
var clearNominatedNode = &framework.NominatingInfo{NominatingMode: framework.ModeOverride, NominatedNodeName: ""}
139+
// newFailureNominatingInfo returns the appropriate NominatingInfo for scheduling failures.
140+
// When NominatedNodeNameForExpectation feature is enabled, it returns nil (no clearing).
141+
// Otherwise, it returns NominatingInfo to clear the pod's nominated node.
142+
func (sched *Scheduler) newFailureNominatingInfo() *framework.NominatingInfo {
143+
if sched.nominatedNodeNameForExpectationEnabled {
144+
return nil
145+
}
146+
return &framework.NominatingInfo{NominatingMode: framework.ModeOverride, NominatedNodeName: ""}
147+
}
140148

141149
// schedulingCycle tries to schedule a single Pod.
142150
func (sched *Scheduler) schedulingCycle(
@@ -156,13 +164,13 @@ func (sched *Scheduler) schedulingCycle(
156164
}()
157165
if err == ErrNoNodesAvailable {
158166
status := fwk.NewStatus(fwk.UnschedulableAndUnresolvable).WithError(err)
159-
return ScheduleResult{nominatingInfo: clearNominatedNode}, podInfo, status
167+
return ScheduleResult{nominatingInfo: sched.newFailureNominatingInfo()}, podInfo, status
160168
}
161169

162170
fitError, ok := err.(*framework.FitError)
163171
if !ok {
164172
logger.Error(err, "Error selecting node for pod", "pod", klog.KObj(pod))
165-
return ScheduleResult{nominatingInfo: clearNominatedNode}, podInfo, fwk.AsStatus(err)
173+
return ScheduleResult{nominatingInfo: sched.newFailureNominatingInfo()}, podInfo, fwk.AsStatus(err)
166174
}
167175

168176
// SchedulePod() may have failed because the pod would not fit on any host, so we try to
@@ -205,7 +213,7 @@ func (sched *Scheduler) schedulingCycle(
205213
// This relies on the fact that Error will check if the pod has been bound
206214
// to a node and if so will not add it back to the unscheduled pods queue
207215
// (otherwise this would cause an infinite loop).
208-
return ScheduleResult{nominatingInfo: clearNominatedNode}, assumedPodInfo, fwk.AsStatus(err)
216+
return ScheduleResult{nominatingInfo: sched.newFailureNominatingInfo()}, assumedPodInfo, fwk.AsStatus(err)
209217
}
210218

211219
// Run the Reserve method of reserve plugins.
@@ -226,9 +234,9 @@ func (sched *Scheduler) schedulingCycle(
226234
}
227235
fitErr.Diagnosis.NodeToStatus.Set(scheduleResult.SuggestedHost, sts)
228236
fitErr.Diagnosis.AddPluginStatus(sts)
229-
return ScheduleResult{nominatingInfo: clearNominatedNode}, assumedPodInfo, fwk.NewStatus(sts.Code()).WithError(fitErr)
237+
return ScheduleResult{nominatingInfo: sched.newFailureNominatingInfo()}, assumedPodInfo, fwk.NewStatus(sts.Code()).WithError(fitErr)
230238
}
231-
return ScheduleResult{nominatingInfo: clearNominatedNode}, assumedPodInfo, sts
239+
return ScheduleResult{nominatingInfo: sched.newFailureNominatingInfo()}, assumedPodInfo, sts
232240
}
233241

234242
// Run "permit" plugins.
@@ -250,10 +258,10 @@ func (sched *Scheduler) schedulingCycle(
250258
}
251259
fitErr.Diagnosis.NodeToStatus.Set(scheduleResult.SuggestedHost, runPermitStatus)
252260
fitErr.Diagnosis.AddPluginStatus(runPermitStatus)
253-
return ScheduleResult{nominatingInfo: clearNominatedNode}, assumedPodInfo, fwk.NewStatus(runPermitStatus.Code()).WithError(fitErr)
261+
return ScheduleResult{nominatingInfo: sched.newFailureNominatingInfo()}, assumedPodInfo, fwk.NewStatus(runPermitStatus.Code()).WithError(fitErr)
254262
}
255263

256-
return ScheduleResult{nominatingInfo: clearNominatedNode}, assumedPodInfo, runPermitStatus
264+
return ScheduleResult{nominatingInfo: sched.newFailureNominatingInfo()}, assumedPodInfo, runPermitStatus
257265
}
258266

259267
// At the end of a successful scheduling cycle, pop and move up Pods if needed.
@@ -385,7 +393,7 @@ func (sched *Scheduler) handleBindingCycleError(
385393
}
386394
}
387395

388-
sched.FailureHandler(ctx, fwk, podInfo, status, clearNominatedNode, start)
396+
sched.FailureHandler(ctx, fwk, podInfo, status, sched.newFailureNominatingInfo(), start)
389397
}
390398

391399
func (sched *Scheduler) frameworkForPod(pod *v1.Pod) (framework.Framework, error) {

pkg/scheduler/schedule_one_test.go

Lines changed: 118 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -916,8 +916,62 @@ func TestSchedulerScheduleOne(t *testing.T) {
916916
mockScheduleResult: emptyScheduleResult,
917917
eventReason: "FailedScheduling",
918918
},
919+
{
920+
name: "pod with existing nominated node name on scheduling error keeps nomination",
921+
sendPod: func() *v1.Pod {
922+
p := podWithID("foo", "")
923+
p.Status.NominatedNodeName = "existing-node"
924+
return p
925+
}(),
926+
injectSchedulingError: schedulingErr,
927+
mockScheduleResult: scheduleResultOk,
928+
expectError: schedulingErr,
929+
expectErrorPod: func() *v1.Pod {
930+
p := podWithID("foo", "")
931+
p.Status.NominatedNodeName = "existing-node"
932+
return p
933+
}(),
934+
expectPodInBackoffQ: func() *v1.Pod {
935+
p := podWithID("foo", "")
936+
p.Status.NominatedNodeName = "existing-node"
937+
return p
938+
}(),
939+
// Depending on the timing, if asyncAPICallsEnabled, the NNN update might not be sent yet while checking the expectNominatedNodeName.
940+
// So, asyncAPICallsEnabled is set to false.
941+
asyncAPICallsEnabled: ptr.To(false),
942+
nominatedNodeNameForExpectationEnabled: ptr.To(true),
943+
expectNominatedNodeName: "existing-node",
944+
eventReason: "FailedScheduling",
945+
},
946+
{
947+
name: "pod with existing nominated node name on scheduling error clears nomination",
948+
sendPod: func() *v1.Pod {
949+
p := podWithID("foo", "")
950+
p.Status.NominatedNodeName = "existing-node"
951+
return p
952+
}(),
953+
injectSchedulingError: schedulingErr,
954+
mockScheduleResult: scheduleResultOk,
955+
expectError: schedulingErr,
956+
expectErrorPod: func() *v1.Pod {
957+
p := podWithID("foo", "")
958+
p.Status.NominatedNodeName = "existing-node"
959+
return p
960+
}(),
961+
expectPodInBackoffQ: func() *v1.Pod {
962+
p := podWithID("foo", "")
963+
p.Status.NominatedNodeName = "existing-node"
964+
return p
965+
}(),
966+
// Depending on the timing, if asyncAPICallsEnabled, the NNN update might not be sent yet while checking the expectNominatedNodeName.
967+
// So, asyncAPICallsEnabled is set to false.
968+
asyncAPICallsEnabled: ptr.To(false),
969+
nominatedNodeNameForExpectationEnabled: ptr.To(false),
970+
eventReason: "FailedScheduling",
971+
},
919972
}
920973

974+
// Test with QueueingHints and NominatedNodeNameForExpectation feature gates
921975
for _, qHintEnabled := range []bool{true, false} {
922976
for _, item := range table {
923977
asyncAPICallsEnabled := []bool{true, false}
@@ -930,8 +984,8 @@ func TestSchedulerScheduleOne(t *testing.T) {
930984
nominatedNodeNameForExpectationEnabled = []bool{*item.nominatedNodeNameForExpectationEnabled}
931985
}
932986
for _, nominatedNodeNameForExpectationEnabled := range nominatedNodeNameForExpectationEnabled {
933-
if nominatedNodeNameForExpectationEnabled && !qHintEnabled {
934-
// If the QHint feature gate is disabled, NominatedNodeNameForExpectation cannot be enabled
987+
if (asyncAPICallsEnabled || nominatedNodeNameForExpectationEnabled) && !qHintEnabled {
988+
// If the QHint feature gate is disabled, NominatedNodeNameForExpectation and SchedulerAsyncAPICalls cannot be enabled
935989
// because that means users set the emilation version to 1.33 or later.
936990
continue
937991
}
@@ -946,6 +1000,8 @@ func TestSchedulerScheduleOne(t *testing.T) {
9461000
var gotForgetPod *v1.Pod
9471001
var gotAssumedPod *v1.Pod
9481002
var gotBinding *v1.Binding
1003+
var gotNominatingInfo *framework.NominatingInfo
1004+
9491005
client := clientsetfake.NewClientset(item.sendPod)
9501006
informerFactory := informers.NewSharedInformerFactory(client, 0)
9511007
client.PrependReactor("create", "pods", func(action clienttesting.Action) (bool, runtime.Object, error) {
@@ -1050,6 +1106,7 @@ func TestSchedulerScheduleOne(t *testing.T) {
10501106
sched.FailureHandler = func(ctx context.Context, fwk framework.Framework, p *framework.QueuedPodInfo, status *fwk.Status, ni *framework.NominatingInfo, start time.Time) {
10511107
gotPod = p.Pod
10521108
gotError = status.AsError()
1109+
gotNominatingInfo = ni
10531110

10541111
sched.handleSchedulingFailure(ctx, fwk, p, status, ni, start)
10551112
}
@@ -1101,6 +1158,16 @@ func TestSchedulerScheduleOne(t *testing.T) {
11011158
} else if item.expectError.Error() != gotError.Error() {
11021159
t.Errorf("Unexpected error. Wanted %v, got %v", item.expectError.Error(), gotError.Error())
11031160
}
1161+
if item.expectError != nil {
1162+
var expectedNominatingInfo *framework.NominatingInfo
1163+
// Check nominatingInfo expectation based on feature gate
1164+
if !nominatedNodeNameForExpectationEnabled {
1165+
expectedNominatingInfo = &framework.NominatingInfo{NominatingMode: framework.ModeOverride, NominatedNodeName: ""}
1166+
}
1167+
if diff := cmp.Diff(expectedNominatingInfo, gotNominatingInfo); diff != "" {
1168+
t.Errorf("Unexpected nominatingInfo (-want,+got):\n%s", diff)
1169+
}
1170+
}
11041171
if diff := cmp.Diff(item.expectBind, gotBinding); diff != "" {
11051172
t.Errorf("Unexpected binding (-want,+got):\n%s", diff)
11061173
}
@@ -2172,6 +2239,55 @@ func TestUpdatePod(t *testing.T) {
21722239
expectPatchRequest: true,
21732240
expectedPatchDataPattern: `{"status":{"nominatedNodeName":"node1"}}`,
21742241
},
2242+
{
2243+
name: "Should not update nominated node name when nominatingInfo is nil",
2244+
currentPodConditions: []v1.PodCondition{
2245+
{
2246+
Type: "currentType",
2247+
Status: "currentStatus",
2248+
LastProbeTime: metav1.NewTime(time.Date(2020, 5, 13, 0, 0, 0, 0, time.UTC)),
2249+
LastTransitionTime: metav1.NewTime(time.Date(2020, 5, 12, 0, 0, 0, 0, time.UTC)),
2250+
Reason: "currentReason",
2251+
Message: "currentMessage",
2252+
},
2253+
},
2254+
newPodCondition: &v1.PodCondition{
2255+
Type: "currentType",
2256+
Status: "newStatus",
2257+
LastProbeTime: metav1.NewTime(time.Date(2020, 5, 13, 1, 1, 1, 1, time.UTC)),
2258+
LastTransitionTime: metav1.NewTime(time.Date(2020, 5, 12, 1, 1, 1, 1, time.UTC)),
2259+
Reason: "newReason",
2260+
Message: "newMessage",
2261+
},
2262+
currentNominatedNodeName: "existing-node",
2263+
newNominatingInfo: nil,
2264+
expectPatchRequest: true,
2265+
expectedPatchDataPattern: `{"status":{"\$setElementOrder/conditions":\[{"type":"currentType"}],"conditions":\[{"lastProbeTime":"2020-05-13T01:01:01Z","lastTransitionTime":".*","message":"newMessage","reason":"newReason","status":"newStatus","type":"currentType"}]}}`,
2266+
},
2267+
{
2268+
name: "Should not make patch request when nominatingInfo is nil and pod condition is unchanged",
2269+
currentPodConditions: []v1.PodCondition{
2270+
{
2271+
Type: "currentType",
2272+
Status: "currentStatus",
2273+
LastProbeTime: metav1.NewTime(time.Date(2020, 5, 13, 0, 0, 0, 0, time.UTC)),
2274+
LastTransitionTime: metav1.NewTime(time.Date(2020, 5, 12, 0, 0, 0, 0, time.UTC)),
2275+
Reason: "currentReason",
2276+
Message: "currentMessage",
2277+
},
2278+
},
2279+
newPodCondition: &v1.PodCondition{
2280+
Type: "currentType",
2281+
Status: "currentStatus",
2282+
LastProbeTime: metav1.NewTime(time.Date(2020, 5, 13, 0, 0, 0, 0, time.UTC)),
2283+
LastTransitionTime: metav1.NewTime(time.Date(2020, 5, 12, 0, 0, 0, 0, time.UTC)),
2284+
Reason: "currentReason",
2285+
Message: "currentMessage",
2286+
},
2287+
currentNominatedNodeName: "existing-node",
2288+
newNominatingInfo: nil,
2289+
expectPatchRequest: false,
2290+
},
21752291
}
21762292
for _, asyncAPICallsEnabled := range []bool{true, false} {
21772293
for _, test := range tests {

0 commit comments

Comments
 (0)