Skip to content

Commit

Permalink
[YUNIKORN-1909] add gang scheduling with hugepages
Browse files Browse the repository at this point in the history
Signed-off-by: Frank Yang <[email protected]>
  • Loading branch information
FrankYang0529 committed Aug 25, 2023
1 parent 71b5fe9 commit 6fdd9c3
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ jobs:
uses: actions/setup-go@v3
with:
go-version-file: .go_version
- name: Set hugpage
run: |
echo "vm.nr_hugepages = 1024" | sudo tee -a /etc/sysctl.conf
sudo sysctl -p
sudo sysctl -a | grep vm.nr_hugepages
- run: ./scripts/run-e2e-tests.sh -a "test" -n "yk8s" -v "kindest/node:${KIND_NODE_IMAGE}" ${KIND_EXTRA_ARGS}
env:
KIND_NODE_IMAGE: ${{ matrix.k8s }}
Expand Down
44 changes: 44 additions & 0 deletions test/e2e/gang_scheduling/gang_scheduling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,50 @@ var _ = Describe("", func() {
}),
)

// Test placeholder with hugepages
// 1. Deploy 1 job with hugepages-2Mi
// 2. Verify all pods running
It("Verify_HugePage", func() {
hugepageKey := fmt.Sprintf("%s2Mi", v1.ResourceHugePagesPrefix)
nodes, err := kClient.GetNodes()
Ω(err).NotTo(HaveOccurred())
hasHugePages := false
for _, node := range nodes.Items {
if v, ok := node.Status.Capacity[v1.ResourceName(hugepageKey)]; ok {
if v.Value() != 0 {
hasHugePages = true
break
}
}
}
if !hasHugePages {
ginkgo.Skip("Skip hugepages test as no node has hugepages")
}

// add hugepages to request
minResource[hugepageKey] = resource.MustParse("100Mi")
annotations := k8s.PodAnnotation{
TaskGroupName: groupA,
TaskGroups: []interfaces.TaskGroup{
{Name: groupA, MinMember: int32(3), MinResource: minResource},
},
}
job := createJob(appID, minResource, annotations, 3)

By("Verify all job pods are running")
jobRunErr := kClient.WaitForJobPods(ns, job.Name, int(*job.Spec.Parallelism), 2*time.Minute)
Ω(jobRunErr).NotTo(HaveOccurred())

checkAppStatus(appID, yunikorn.States().Application.Running)

// Ensure placeholders are replaced and allocations count is correct
appDaoInfo, appDaoInfoErr := restClient.GetAppInfo(configmanager.DefaultPartition, nsQueue, appID)
Ω(appDaoInfoErr).NotTo(HaveOccurred())
Ω(len(appDaoInfo.PlaceholderData)).To(Equal(1), "Placeholder count is not correct")
checkPlaceholderData(appDaoInfo, groupA, 3, 3, 0)
Ω(len(appDaoInfo.Allocations)).To(Equal(int(3)), "Allocations count is not correct")
})

AfterEach(func() {
testDescription := ginkgo.CurrentSpecReport()
if testDescription.Failed() {
Expand Down

0 comments on commit 6fdd9c3

Please sign in to comment.