Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions pkg/controller/jobframework/tas_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package jobframework

import (
"fmt"
"strconv"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
metavalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
Expand All @@ -31,6 +32,8 @@ func ValidateTASPodSetRequest(replicaPath *field.Path, replicaMetadata *metav1.O
requiredValue, requiredFound := replicaMetadata.Annotations[kueuealpha.PodSetRequiredTopologyAnnotation]
preferredValue, preferredFound := replicaMetadata.Annotations[kueuealpha.PodSetPreferredTopologyAnnotation]
_, unconstrainedFound := replicaMetadata.Annotations[kueuealpha.PodSetUnconstrainedTopologyAnnotation]
sliceRequiredValue, sliceRequiredFound := replicaMetadata.Annotations[kueuealpha.PodSetSliceRequiredTopologyAnnotation]
sliceSizeValue, sliceSizeFound := replicaMetadata.Annotations[kueuealpha.PodSetSliceSizeAnnotation]

// validate no more than 1 annotation
asInt := func(b bool) int {
Expand All @@ -57,5 +60,33 @@ func ValidateTASPodSetRequest(replicaPath *field.Path, replicaMetadata *metav1.O
if preferredFound {
allErrs = append(allErrs, metavalidation.ValidateLabelName(preferredValue, annotationsPath.Key(kueuealpha.PodSetPreferredTopologyAnnotation))...)
}
if sliceRequiredFound {
allErrs = append(allErrs, metavalidation.ValidateLabelName(sliceRequiredValue, annotationsPath.Key(kueuealpha.PodSetSliceRequiredTopologyAnnotation))...)
}

// validate slice size annotation
if sliceSizeFound {
val, err := strconv.Atoi(sliceSizeValue)
if err != nil {
allErrs = append(allErrs, field.Invalid(annotationsPath.Key(kueuealpha.PodSetSliceSizeAnnotation), sliceSizeValue, "must be a numeric value"))
} else if val < 1 {
allErrs = append(allErrs, field.Invalid(annotationsPath.Key(kueuealpha.PodSetSliceSizeAnnotation), sliceSizeValue, "must be greater than or equal to 1"))
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we validate that it divides into pod count with no remainder?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's a good idea, but I think it'll get more complex, as we don't have easy access to number of pods in a PodSet in that stage (see PR description). I'll give it a look thought, maybe it's easy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make the algorithm substantially harder? If not, then I would rather support also Jobs divided with reminder.

The API-wise it makes sense to me, that you may want to have a Job of size 1000 Pods split into racks of 16 nodes. It might get awkward to require a user to pad the Job to create 1008 Pods to split into racks equally.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it will make algorithm much more complex. If we do not mind assuming that the last slice behaves like a full slice then it should work almost out of the box (corner case: if 1000 pods fit, but 1008 do not fit, then we also won't fit such workload)

Copy link
Contributor

@mimowo mimowo Jun 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is fair for simplicity to allow it with the remark about its use in documentation, for example in the limitations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. I'll make sure to reflect it in the main PR and write a test for that.

There is still a caveat of checking if slice size do not exceed the number of pods in PodSet. However that will be a bigger change, so we can proceed with this PR while I work on adding that validation.


// validate slice annotations
if sliceRequiredFound {
if !requiredFound && !preferredFound {
allErrs = append(allErrs, field.Forbidden(annotationsPath.Key(kueuealpha.PodSetSliceRequiredTopologyAnnotation), "can be used with required or preferred topology only"))
}
if !sliceSizeFound {
allErrs = append(allErrs, field.Required(annotationsPath.Key(kueuealpha.PodSetSliceSizeAnnotation), "slice size is required if slice topology is requested"))
}
}

if !sliceRequiredFound && sliceSizeFound {
allErrs = append(allErrs, field.Forbidden(annotationsPath.Key(kueuealpha.PodSetSliceSizeAnnotation), fmt.Sprintf("cannot be set when '%s' is not present", kueuealpha.PodSetSliceRequiredTopologyAnnotation)))
}

return allErrs
}
106 changes: 106 additions & 0 deletions pkg/controller/jobs/job/job_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,89 @@ func TestValidateCreate(t *testing.T) {
invalidLabelKeyMessage).WithOrigin("labelKey"),
},
},
{
name: "valid slice topology request",
job: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceSizeAnnotation, "1").
Obj(),
wantErr: nil,
},
{
name: "invalid topology request - unconstrained with slices defined",
job: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetUnconstrainedTopologyAnnotation, "true").
PodAnnotation(kueuealpha.PodSetSliceRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceSizeAnnotation, "1").
Obj(),
wantErr: field.ErrorList{
field.Forbidden(replicaMetaPath.Child("annotations").Key("kueue.x-k8s.io/podset-slice-required-topology"), "can be used with required or preferred topology only"),
},
},
{
name: "invalid topology request - slice requested without slice size",
job: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceRequiredTopologyAnnotation, "cloud.com/block").
Obj(),
wantErr: field.ErrorList{
field.Required(replicaMetaPath.Child("annotations").Key("kueue.x-k8s.io/podset-slice-size"), "slice size is required if slice topology is requested"),
},
},
{
name: "invalid topology request - slice size is not a number",
job: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceSizeAnnotation, "not a number").
Obj(),
wantErr: field.ErrorList{
field.Invalid(replicaMetaPath.Child("annotations").Key("kueue.x-k8s.io/podset-slice-size"), "not a number", "must be a numeric value"),
},
},
{
name: "invalid topology request - slice size is negative",
job: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceSizeAnnotation, "-1").
Obj(),
wantErr: field.ErrorList{
field.Invalid(replicaMetaPath.Child("annotations").Key("kueue.x-k8s.io/podset-slice-size"), "-1", "must be greater than or equal to 1"),
},
},
{
name: "invalid topology request - slice size is zero",
job: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceSizeAnnotation, "0").
Obj(),
wantErr: field.ErrorList{
field.Invalid(replicaMetaPath.Child("annotations").Key("kueue.x-k8s.io/podset-slice-size"), "0", "must be greater than or equal to 1"),
},
},
{
name: "invalid topology request - slice size provided without slice topology",
job: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceSizeAnnotation, "1").
Obj(),
wantErr: field.ErrorList{
field.Forbidden(replicaMetaPath.Child("annotations").Key("kueue.x-k8s.io/podset-slice-size"), "cannot be set when 'kueue.x-k8s.io/podset-slice-required-topology' is not present"),
},
},
{
name: "invalid topology request - slice topology requested without podset topology",
job: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetSliceRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceSizeAnnotation, "1").
Obj(),
wantErr: field.ErrorList{
field.Forbidden(replicaMetaPath.Child("annotations").Key("kueue.x-k8s.io/podset-slice-required-topology"), "can be used with required or preferred topology only"),
},
},
}

for _, tc := range testcases {
Expand Down Expand Up @@ -541,6 +624,29 @@ func TestValidateUpdate(t *testing.T) {
`must not contain more than one topology annotation: ["kueue.x-k8s.io/podset-required-topology", `+
`"kueue.x-k8s.io/podset-preferred-topology", "kueue.x-k8s.io/podset-unconstrained-topology"]`)},
},
{
name: "valid slice topology request",
oldJob: testingutil.MakeJob("job", "default").
Obj(),
newJob: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceSizeAnnotation, "1").
Obj(),
},
{
name: "attempt to set invalid slice topology request",
oldJob: testingutil.MakeJob("job", "default").
Obj(),
newJob: testingutil.MakeJob("job", "default").
PodAnnotation(kueuealpha.PodSetUnconstrainedTopologyAnnotation, "true").
PodAnnotation(kueuealpha.PodSetSliceRequiredTopologyAnnotation, "cloud.com/block").
PodAnnotation(kueuealpha.PodSetSliceSizeAnnotation, "1").
Obj(),
wantErr: field.ErrorList{
field.Forbidden(replicaMetaPath.Child("annotations").Key("kueue.x-k8s.io/podset-slice-required-topology"), "can be used with required or preferred topology only"),
},
},
}

for _, tc := range testcases {
Expand Down