From 6fdd9c31b1561697bbf4887f17e2f659afbe74b9 Mon Sep 17 00:00:00 2001 From: Frank Yang Date: Sat, 12 Aug 2023 14:05:21 +0800 Subject: [PATCH] [YUNIKORN-1909] add gang scheduling with hugepages Signed-off-by: Frank Yang --- .github/workflows/pre-commit.yml | 5 +++ .../gang_scheduling/gang_scheduling_test.go | 44 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index af2c7d1e7..f08c6f730 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -49,6 +49,11 @@ jobs: uses: actions/setup-go@v3 with: go-version-file: .go_version + - name: Set hugpage + run: | + echo "vm.nr_hugepages = 1024" | sudo tee -a /etc/sysctl.conf + sudo sysctl -p + sudo sysctl -a | grep vm.nr_hugepages - run: ./scripts/run-e2e-tests.sh -a "test" -n "yk8s" -v "kindest/node:${KIND_NODE_IMAGE}" ${KIND_EXTRA_ARGS} env: KIND_NODE_IMAGE: ${{ matrix.k8s }} diff --git a/test/e2e/gang_scheduling/gang_scheduling_test.go b/test/e2e/gang_scheduling/gang_scheduling_test.go index 5d8069b03..bc1143f21 100644 --- a/test/e2e/gang_scheduling/gang_scheduling_test.go +++ b/test/e2e/gang_scheduling/gang_scheduling_test.go @@ -544,6 +544,50 @@ var _ = Describe("", func() { }), ) + // Test placeholder with hugepages + // 1. Deploy 1 job with hugepages-2Mi + // 2. Verify all pods running + It("Verify_HugePage", func() { + hugepageKey := fmt.Sprintf("%s2Mi", v1.ResourceHugePagesPrefix) + nodes, err := kClient.GetNodes() + Ω(err).NotTo(HaveOccurred()) + hasHugePages := false + for _, node := range nodes.Items { + if v, ok := node.Status.Capacity[v1.ResourceName(hugepageKey)]; ok { + if v.Value() != 0 { + hasHugePages = true + break + } + } + } + if !hasHugePages { + ginkgo.Skip("Skip hugepages test as no node has hugepages") + } + + // add hugepages to request + minResource[hugepageKey] = resource.MustParse("100Mi") + annotations := k8s.PodAnnotation{ + TaskGroupName: groupA, + TaskGroups: []interfaces.TaskGroup{ + {Name: groupA, MinMember: int32(3), MinResource: minResource}, + }, + } + job := createJob(appID, minResource, annotations, 3) + + By("Verify all job pods are running") + jobRunErr := kClient.WaitForJobPods(ns, job.Name, int(*job.Spec.Parallelism), 2*time.Minute) + Ω(jobRunErr).NotTo(HaveOccurred()) + + checkAppStatus(appID, yunikorn.States().Application.Running) + + // Ensure placeholders are replaced and allocations count is correct + appDaoInfo, appDaoInfoErr := restClient.GetAppInfo(configmanager.DefaultPartition, nsQueue, appID) + Ω(appDaoInfoErr).NotTo(HaveOccurred()) + Ω(len(appDaoInfo.PlaceholderData)).To(Equal(1), "Placeholder count is not correct") + checkPlaceholderData(appDaoInfo, groupA, 3, 3, 0) + Ω(len(appDaoInfo.Allocations)).To(Equal(int(3)), "Allocations count is not correct") + }) + AfterEach(func() { testDescription := ginkgo.CurrentSpecReport() if testDescription.Failed() {