Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions pkg/scheduler/flavorassigner/flavorassigner.go
Original file line number Diff line number Diff line change
Expand Up @@ -602,8 +602,8 @@ func flavorSelector(spec *corev1.PodSpec, allowedKeys sets.Set[string]) nodeaffi
// could help), it returns a Status with reasons.
func (a *FlavorAssigner) fitsResourceQuota(log logr.Logger, fr resources.FlavorResource, val int64, rQuota cache.ResourceQuota) (granularMode, bool, *Status) {
var status Status
var borrow bool

borrow := a.cq.BorrowingWith(fr, val) && a.cq.HasParent()
available := a.cq.Available(fr)
maxCapacity := a.cq.PotentialAvailable(fr)

Expand All @@ -616,7 +616,7 @@ func (a *FlavorAssigner) fitsResourceQuota(log logr.Logger, fr resources.FlavorR

// Fit
if val <= available {
return fit, a.cq.ResourceNode.Usage[fr]+val > rQuota.Nominal, nil
return fit, borrow, nil
}

// Check if preemption is possible
Expand All @@ -628,7 +628,6 @@ func (a *FlavorAssigner) fitsResourceQuota(log logr.Logger, fr resources.FlavorR
}
} else if a.canPreemptWhileBorrowing() {
mode = preempt
borrow = true
}

status.append(fmt.Sprintf("insufficient unused quota for %s in flavor %s, %s more needed",
Expand Down
4 changes: 4 additions & 0 deletions pkg/scheduler/flavorassigner/flavorassigner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ func TestAssignFlavors(t *testing.T) {
},
Count: 1,
}},
Borrowing: true,
Usage: resources.FlavorResourceQuantities{
{Flavor: "two", Resource: corev1.ResourceCPU}: 3_000,
{Flavor: "two", Resource: corev1.ResourceMemory}: 10 * utiltesting.Mi,
Expand Down Expand Up @@ -885,6 +886,7 @@ func TestAssignFlavors(t *testing.T) {
},
Count: 1,
}},
Borrowing: true,
Usage: resources.FlavorResourceQuantities{
{Flavor: "one", Resource: corev1.ResourceCPU}: 2_000,
},
Expand Down Expand Up @@ -966,6 +968,7 @@ func TestAssignFlavors(t *testing.T) {
},
Count: 1,
}},
Borrowing: true,
Usage: resources.FlavorResourceQuantities{
{Flavor: "one", Resource: corev1.ResourceCPU}: 2_000,
},
Expand Down Expand Up @@ -1816,6 +1819,7 @@ func TestAssignFlavors(t *testing.T) {
},
Count: 1,
}},
Borrowing: true,
Usage: resources.FlavorResourceQuantities{
{Flavor: "one", Resource: corev1.ResourceCPU}: 9_000,
{Flavor: "one", Resource: corev1.ResourcePods}: 1,
Expand Down
112 changes: 112 additions & 0 deletions pkg/scheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,13 @@ func TestSchedule(t *testing.T) {
},
},
"multiple CQs need preemption": {
// legacy mode considers eng-alpha/pending to
// be left in queue, rather than inadmissible,
// due to legacy overlapping preemption skip
// logic. Since we are deleting legacy code in
// 0.10, we will disable this test for it,
// rather than duplicate it.
multiplePreemptions: MultiplePremptions,
additionalClusterQueues: []kueue.ClusterQueue{
*utiltesting.MakeClusterQueue("other-alpha").
Cohort("other").
Expand Down Expand Up @@ -2602,6 +2609,111 @@ func TestSchedule(t *testing.T) {
"eng-beta/b1": *utiltesting.MakeAdmission("other-beta").Assignment("gpu", "spot", "5").Obj(),
},
},
"workload requiring reclaimation prioritized over wl in another full cq": {
// Also see #3405.
//
// CQ2 is lending out capacity to its
// Cohort. It has a pending workload, WL2,
// that fits within nominal capacity, and a
// reclaim policy set to any.

// CQ1 is using half of its capacity, and is
// also lending out remaining capacity.

// CQ3 has no capacity of its own, and is
// borrowing 10 nominal capacity.

// With a pending workloads WL1 and WL2 queued
// in CQ1 and CQ2 respectively, we want to
// make sure that the WL2 is processed first,
// so that its preemption calculations are not
// invalidated by CQ1's WL1, which won't fit
// into its nominal capacity given the
// admitted Admitted-Workload-1.

// As WL1 has an earlier creation timestamp
// than WL2, there was a bug where it would
// process first, reserving capacity which
// invalidated WL2's preemption calculations,
// blocking it indefinitely from reclaiming
// its nominal capacity.
//
// We don't test legacy mode as it classifies
// inadmissible/left different, and we will
// delete that logic shortly.
multiplePreemptions: MultiplePremptions,
additionalClusterQueues: []kueue.ClusterQueue{
*utiltesting.MakeClusterQueue("CQ1").
Cohort("other").
ResourceGroup(
utiltesting.MakeFlavorQuotas("on-demand").Resource("gpu", "10").FlavorQuotas,
).
Obj(),
*utiltesting.MakeClusterQueue("CQ2").
Cohort("other").
Preemption(kueue.ClusterQueuePreemption{
ReclaimWithinCohort: kueue.PreemptionPolicyAny,
}).
ResourceGroup(
utiltesting.MakeFlavorQuotas("on-demand").Resource("gpu", "10").FlavorQuotas,
).
Obj(),
*utiltesting.MakeClusterQueue("CQ3").
Cohort("other").
ResourceGroup(
utiltesting.MakeFlavorQuotas("on-demand").Resource("gpu", "0").FlavorQuotas,
).
Obj(),
},
additionalLocalQueues: []kueue.LocalQueue{
*utiltesting.MakeLocalQueue("lq", "eng-alpha").ClusterQueue("CQ1").Obj(),
*utiltesting.MakeLocalQueue("lq", "eng-beta").ClusterQueue("CQ2").Obj(),
*utiltesting.MakeLocalQueue("lq", "eng-gamma").ClusterQueue("CQ3").Obj(),
},
workloads: []kueue.Workload{
*utiltesting.MakeWorkload("Admitted-Workload-1", "eng-alpha").
Queue("lq").
Request("gpu", "5").
SimpleReserveQuota("CQ1", "on-demand", now).
Obj(),
*utiltesting.MakeWorkload("WL1", "eng-alpha").
Creation(now).
Queue("lq").
Request("gpu", "10").
Obj(),
*utiltesting.MakeWorkload("WL2", "eng-beta").
Creation(now.Add(time.Second)).
Queue("lq").
Request("gpu", "10").
Obj(),
*utiltesting.MakeWorkload("Admitted-Workload-2", "eng-gamma").
Queue("lq").
Priority(0).
Request("gpu", "5").
SimpleReserveQuota("CQ3", "on-demand", now).
Obj(),
*utiltesting.MakeWorkload("Admitted-Workload-3", "eng-gamma").
Queue("lq").
Priority(1).
Request("gpu", "5").
SimpleReserveQuota("CQ3", "on-demand", now).
Obj(),
},
wantPreempted: sets.Set[string](
sets.NewString("eng-gamma/Admitted-Workload-2"),
),
wantLeft: map[string][]string{
"CQ2": {"eng-beta/WL2"},
},
wantInadmissibleLeft: map[string][]string{
"CQ1": {"eng-alpha/WL1"},
},
wantAssignments: map[string]kueue.Admission{
"eng-alpha/Admitted-Workload-1": *utiltesting.MakeAdmission("CQ1").Assignment("gpu", "on-demand", "5").Obj(),
"eng-gamma/Admitted-Workload-2": *utiltesting.MakeAdmission("CQ3").Assignment("gpu", "on-demand", "5").Obj(),
"eng-gamma/Admitted-Workload-3": *utiltesting.MakeAdmission("CQ3").Assignment("gpu", "on-demand", "5").Obj(),
},
},
}

for name, tc := range cases {
Expand Down
5 changes: 3 additions & 2 deletions test/integration/scheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1801,9 +1801,10 @@ var _ = ginkgo.Describe("Scheduler", func() {
g.Expect(k8sClient.Update(ctx, &cq)).Should(gomega.Succeed())
}, util.Timeout, util.Interval).Should(gomega.Succeed())

// pendingWl exceed nominal+borrowing quota and cannot preempt due to low priority.
// pendingWl exceed nominal+borrowing quota and cannot preempt as priority based
// premption within CQ is disabled.
pendingWl := testing.MakeWorkload("pending-wl", matchingNS.Name).Queue(strictFIFOLocalQueue.
Name).Request(corev1.ResourceCPU, "3").Priority(9).Obj()
Name).Request(corev1.ResourceCPU, "3").Priority(99).Obj()
gomega.Expect(k8sClient.Create(ctx, pendingWl)).Should(gomega.Succeed())

// borrowingWL can borrow shared resources, so it should be scheduled even if workloads
Expand Down