Skip to content

Commit 83c8bd7

Browse files
committed
remove shadowPodgroup in scheduler
1 parent 1a856a1 commit 83c8bd7

File tree

8 files changed

+80
-127
lines changed

8 files changed

+80
-127
lines changed

pkg/apis/helpers/helpers.go

+18
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,21 @@ func DeleteConfigmap(job *vkv1.Job, kubeClients kubernetes.Interface, cmName str
130130

131131
return nil
132132
}
133+
134+
// GeneratePodgroupName generate podgroup name of normal pod
135+
func GeneratePodgroupName(pod *v1.Pod) string {
136+
pgName := vkbatchv1.PodgroupNamePrefix
137+
138+
if len(pod.OwnerReferences) != 0 {
139+
for _, ownerReference := range pod.OwnerReferences {
140+
if ownerReference.Controller != nil && *ownerReference.Controller == true {
141+
pgName += string(ownerReference.UID)
142+
return pgName
143+
}
144+
}
145+
}
146+
147+
pgName += string(pod.UID)
148+
149+
return pgName
150+
}

pkg/controllers/podgroup/pg_controller_handler.go

+1-19
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
apierrors "k8s.io/apimachinery/pkg/api/errors"
2424
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2525

26-
vkbatchv1 "volcano.sh/volcano/pkg/apis/batch/v1alpha1"
2726
"volcano.sh/volcano/pkg/apis/helpers"
2827
scheduling "volcano.sh/volcano/pkg/apis/scheduling/v1alpha2"
2928
)
@@ -71,7 +70,7 @@ func (cc *Controller) updatePodAnnotations(pod *v1.Pod, pgName string) error {
7170
}
7271

7372
func (cc *Controller) createNormalPodPGIfNotExist(pod *v1.Pod) error {
74-
pgName := generatePodgroupName(pod)
73+
pgName := helpers.GeneratePodgroupName(pod)
7574

7675
if _, err := cc.pgLister.PodGroups(pod.Namespace).Get(pgName); err != nil {
7776
if !apierrors.IsNotFound(err) {
@@ -101,23 +100,6 @@ func (cc *Controller) createNormalPodPGIfNotExist(pod *v1.Pod) error {
101100
return cc.updatePodAnnotations(pod, pgName)
102101
}
103102

104-
func generatePodgroupName(pod *v1.Pod) string {
105-
pgName := vkbatchv1.PodgroupNamePrefix
106-
107-
if len(pod.OwnerReferences) != 0 {
108-
for _, ownerReference := range pod.OwnerReferences {
109-
if ownerReference.Controller != nil && *ownerReference.Controller == true {
110-
pgName += string(ownerReference.UID)
111-
return pgName
112-
}
113-
}
114-
}
115-
116-
pgName += string(pod.UID)
117-
118-
return pgName
119-
}
120-
121103
func newPGOwnerReferences(pod *v1.Pod) []metav1.OwnerReference {
122104
if len(pod.OwnerReferences) != 0 {
123105
for _, ownerReference := range pod.OwnerReferences {

pkg/scheduler/api/pod_group_info.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ const (
4444

4545
//PodGroupVersionV1Alpha2 represents PodGroupVersion of V1Alpha2
4646
PodGroupVersionV1Alpha2 string = "v1alpha2"
47-
// PodPending means the pod group has been accepted by the system, but scheduler can not allocate
47+
// PodGroupPending means the pod group has been accepted by the system, but scheduler can not allocate
4848
// enough resources to it.
4949
PodGroupPending PodGroupPhase = "Pending"
5050

51-
// PodRunning means `spec.minMember` pods of PodGroups has been in running phase.
51+
// PodGroupRunning means `spec.minMember` pods of PodGroups has been in running phase.
5252
PodGroupRunning PodGroupPhase = "Running"
5353

5454
// PodGroupUnknown means part of `spec.minMember` pods are running but the other part can not

pkg/scheduler/cache/cache.go

+31-44
Original file line numberDiff line numberDiff line change
@@ -507,24 +507,8 @@ func (sc *SchedulerCache) Evict(taskInfo *kbapi.TaskInfo, reason string) error {
507507
}
508508
}()
509509

510-
if !shadowPodGroup(job.PodGroup) {
511-
if job.PodGroup.Version == api.PodGroupVersionV1Alpha1 {
512-
pg, err := api.ConvertPodGroupInfoToV1alpha1(job.PodGroup)
513-
if err != nil {
514-
glog.Errorf("Error While converting api.PodGroup to v1alpha.PodGroup with error: %v", err)
515-
return err
516-
}
517-
sc.Recorder.Eventf(pg, v1.EventTypeNormal, "Evict", reason)
518-
} else if job.PodGroup.Version == api.PodGroupVersionV1Alpha2 {
519-
pg, err := api.ConvertPodGroupInfoToV1alpha2(job.PodGroup)
520-
if err != nil {
521-
glog.Errorf("Error While converting api.PodGroup to v2alpha.PodGroup with error: %v", err)
522-
return err
523-
}
524-
sc.Recorder.Eventf(pg, v1.EventTypeNormal, "Evict", reason)
525-
} else {
526-
return fmt.Errorf("Invalid PodGroup Version: %s", job.PodGroup.Version)
527-
}
510+
if err := sc.convertPodGroupInfo(job, v1.EventTypeNormal, "Evict", reason); err != nil {
511+
return err
528512
}
529513

530514
return nil
@@ -778,33 +762,16 @@ func (sc *SchedulerCache) RecordJobStatusEvent(job *kbapi.JobInfo) {
778762
baseErrorMessage = kbapi.AllNodeUnavailableMsg
779763
}
780764

781-
if !shadowPodGroup(job.PodGroup) {
782-
pgUnschedulable := job.PodGroup != nil &&
783-
(job.PodGroup.Status.Phase == api.PodGroupUnknown ||
784-
job.PodGroup.Status.Phase == api.PodGroupPending)
785-
pdbUnschedulabe := job.PDB != nil && len(job.TaskStatusIndex[api.Pending]) != 0
765+
pgUnschedulable := job.PodGroup != nil &&
766+
(job.PodGroup.Status.Phase == api.PodGroupUnknown ||
767+
job.PodGroup.Status.Phase == api.PodGroupPending)
768+
pdbUnschedulabe := job.PDB != nil && len(job.TaskStatusIndex[api.Pending]) != 0
786769

787-
// If pending or unschedulable, record unschedulable event.
788-
if pgUnschedulable || pdbUnschedulabe {
789-
msg := fmt.Sprintf("%v/%v tasks in gang unschedulable: %v", len(job.TaskStatusIndex[api.Pending]), len(job.Tasks), job.FitError())
790-
if job.PodGroup.Version == api.PodGroupVersionV1Alpha1 {
791-
podGroup, err := api.ConvertPodGroupInfoToV1alpha1(job.PodGroup)
792-
if err != nil {
793-
glog.Errorf("Error while converting PodGroup to v1alpha1.PodGroup with error: %v", err)
794-
}
795-
sc.Recorder.Eventf(podGroup, v1.EventTypeWarning,
796-
string(v1alpha1.PodGroupUnschedulableType), msg)
797-
}
770+
// If pending or unschedulable, record unschedulable event.
771+
if pgUnschedulable || pdbUnschedulabe {
772+
msg := fmt.Sprintf("%v/%v tasks in gang unschedulable: %v", len(job.TaskStatusIndex[api.Pending]), len(job.Tasks), job.FitError())
798773

799-
if job.PodGroup.Version == api.PodGroupVersionV1Alpha2 {
800-
podGroup, err := api.ConvertPodGroupInfoToV1alpha2(job.PodGroup)
801-
if err != nil {
802-
glog.Errorf("Error while converting PodGroup to v1alpha2.PodGroup with error: %v", err)
803-
}
804-
sc.Recorder.Eventf(podGroup, v1.EventTypeWarning,
805-
string(v1alpha1.PodGroupUnschedulableType), msg)
806-
}
807-
}
774+
sc.convertPodGroupInfo(job, v1.EventTypeWarning, string(v1alpha1.PodGroupUnschedulableType), msg)
808775
}
809776

810777
// Update podCondition for tasks Allocated and Pending before job discarded
@@ -825,7 +792,7 @@ func (sc *SchedulerCache) RecordJobStatusEvent(job *kbapi.JobInfo) {
825792

826793
// UpdateJobStatus update the status of job and its tasks.
827794
func (sc *SchedulerCache) UpdateJobStatus(job *kbapi.JobInfo, updatePG bool) (*kbapi.JobInfo, error) {
828-
if updatePG && !shadowPodGroup(job.PodGroup) {
795+
if updatePG {
829796
pg, err := sc.StatusUpdater.UpdatePodGroup(job.PodGroup)
830797
if err != nil {
831798
return nil, err
@@ -837,3 +804,23 @@ func (sc *SchedulerCache) UpdateJobStatus(job *kbapi.JobInfo, updatePG bool) (*k
837804

838805
return job, nil
839806
}
807+
808+
func (sc *SchedulerCache) convertPodGroupInfo(job *kbapi.JobInfo, eventtype, reason, message string) error {
809+
if job.PodGroup.Version == api.PodGroupVersionV1Alpha1 {
810+
pg, err := api.ConvertPodGroupInfoToV1alpha1(job.PodGroup)
811+
if err != nil {
812+
glog.Errorf("Error While converting api.PodGroup to v1alpha.PodGroup with error: %v", err)
813+
return err
814+
}
815+
sc.Recorder.Eventf(pg, eventtype, reason, message)
816+
} else if job.PodGroup.Version == api.PodGroupVersionV1Alpha2 {
817+
pg, err := api.ConvertPodGroupInfoToV1alpha2(job.PodGroup)
818+
if err != nil {
819+
glog.Errorf("Error While converting api.PodGroup to v2alpha.PodGroup with error: %v", err)
820+
return err
821+
}
822+
sc.Recorder.Eventf(pg, eventtype, reason, message)
823+
}
824+
825+
return fmt.Errorf("Invalid PodGroup Version: %s", job.PodGroup.Version)
826+
}

pkg/scheduler/cache/cache_test.go

+14-16
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,15 @@ func TestAddPod(t *testing.T) {
133133
pod1 := buildPod("c1", "p1", "", v1.PodPending, buildResourceList("1000m", "1G"),
134134
[]metav1.OwnerReference{owner}, make(map[string]string))
135135
pi1 := api.NewTaskInfo(pod1)
136-
pi1.Job = "j1" // The job name is set by cache.
136+
pg1 := createShadowPodGroup(pod1)
137+
pi1.Job = getJobID(pg1) // The job name is set by cache.
137138
pod2 := buildPod("c1", "p2", "n1", v1.PodRunning, buildResourceList("1000m", "1G"),
138139
[]metav1.OwnerReference{owner}, make(map[string]string))
139140
pi2 := api.NewTaskInfo(pod2)
140-
pi2.Job = "j1" // The job name is set by cache.
141+
pg2 := createShadowPodGroup(pod2)
142+
pi2.Job = getJobID(pg2) // The job name is set by cache.
141143

142-
j1 := api.NewJobInfo(api.JobID("j1"), pi1, pi2)
143-
pg := createShadowPodGroup(pod1)
144-
j1.SetPodGroup(pg)
144+
j1 := api.NewJobInfo(api.JobID("c1/podgroup-j1"), pi1, pi2)
145145

146146
node1 := buildNode("n1", buildResourceList("2000m", "10G"))
147147
ni1 := api.NewNodeInfo(node1)
@@ -160,7 +160,7 @@ func TestAddPod(t *testing.T) {
160160
"n1": ni1,
161161
},
162162
Jobs: map[api.JobID]*api.JobInfo{
163-
"j1": j1,
163+
"c1/podgroup-j1": j1,
164164
},
165165
},
166166
},
@@ -196,23 +196,21 @@ func TestAddNode(t *testing.T) {
196196
pod1 := buildPod("c1", "p1", "", v1.PodPending, buildResourceList("1000m", "1G"),
197197
[]metav1.OwnerReference{owner1}, make(map[string]string))
198198
pi1 := api.NewTaskInfo(pod1)
199-
pi1.Job = "j1" // The job name is set by cache.
199+
pg1 := createShadowPodGroup(pod1)
200+
pi1.Job = getJobID(pg1) // The job name is set by cache.
200201

201202
pod2 := buildPod("c1", "p2", "n1", v1.PodRunning, buildResourceList("1000m", "1G"),
202203
[]metav1.OwnerReference{owner2}, make(map[string]string))
203204
pi2 := api.NewTaskInfo(pod2)
204-
pi2.Job = "j2" // The job name is set by cache.
205+
pg2 := createShadowPodGroup(pod2)
206+
pi2.Job = getJobID(pg2) // The job name is set by cache.
205207

206208
ni1 := api.NewNodeInfo(node1)
207209
ni1.AddTask(pi2)
208210

209-
j1 := api.NewJobInfo("j1")
210-
pg1 := createShadowPodGroup(pod1)
211-
j1.SetPodGroup(pg1)
211+
j1 := api.NewJobInfo("c1/podgroup-j1")
212212

213-
j2 := api.NewJobInfo("j2")
214-
pg2 := createShadowPodGroup(pod2)
215-
j2.SetPodGroup(pg2)
213+
j2 := api.NewJobInfo("c1/podgroup-j2")
216214

217215
j1.AddTaskInfo(pi1)
218216
j2.AddTaskInfo(pi2)
@@ -230,8 +228,8 @@ func TestAddNode(t *testing.T) {
230228
"n1": ni1,
231229
},
232230
Jobs: map[api.JobID]*api.JobInfo{
233-
"j1": j1,
234-
"j2": j2,
231+
"c1/podgroup-j1": j1,
232+
"c1/podgroup-j2": j2,
235233
},
236234
},
237235
},

pkg/scheduler/cache/event_handlers.go

+4-13
Original file line numberDiff line numberDiff line change
@@ -49,20 +49,11 @@ func (sc *SchedulerCache) getOrCreateJob(pi *kbapi.TaskInfo) *kbapi.JobInfo {
4949
return nil
5050
}
5151
pb := createShadowPodGroup(pi.Pod)
52-
pi.Job = kbapi.JobID(pb.Name)
53-
54-
if _, found := sc.Jobs[pi.Job]; !found {
55-
job := kbapi.NewJobInfo(pi.Job)
56-
job.SetPodGroup(pb)
57-
// Set default queue for shadow podgroup.
58-
job.Queue = kbapi.QueueID(sc.defaultQueue)
52+
pi.Job = getJobID(pb)
53+
}
5954

60-
sc.Jobs[pi.Job] = job
61-
}
62-
} else {
63-
if _, found := sc.Jobs[pi.Job]; !found {
64-
sc.Jobs[pi.Job] = kbapi.NewJobInfo(pi.Job)
65-
}
55+
if _, found := sc.Jobs[pi.Job]; !found {
56+
sc.Jobs[pi.Job] = kbapi.NewJobInfo(pi.Job)
6657
}
6758

6859
return sc.Jobs[pi.Job]

pkg/scheduler/cache/util.go

+4-27
Original file line numberDiff line numberDiff line change
@@ -17,43 +17,20 @@ limitations under the License.
1717
package cache
1818

1919
import (
20-
v1 "k8s.io/api/core/v1"
20+
"k8s.io/api/core/v1"
2121
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2222

23-
"volcano.sh/volcano/pkg/apis/utils"
23+
"volcano.sh/volcano/pkg/apis/helpers"
2424
"volcano.sh/volcano/pkg/scheduler/api"
2525
)
2626

27-
const (
28-
shadowPodGroupKey = "volcano/shadow-pod-group"
29-
)
30-
31-
func shadowPodGroup(pg *api.PodGroup) bool {
32-
if pg == nil {
33-
return true
34-
}
35-
36-
_, found := pg.Annotations[shadowPodGroupKey]
37-
38-
return found
39-
}
40-
4127
func createShadowPodGroup(pod *v1.Pod) *api.PodGroup {
42-
jobID := api.JobID(utils.GetController(pod))
43-
if len(jobID) == 0 {
44-
jobID = api.JobID(pod.UID)
45-
}
28+
pgName := helpers.GeneratePodgroupName(pod)
4629

4730
return &api.PodGroup{
4831
ObjectMeta: metav1.ObjectMeta{
4932
Namespace: pod.Namespace,
50-
Name: string(jobID),
51-
Annotations: map[string]string{
52-
shadowPodGroupKey: string(jobID),
53-
},
54-
},
55-
Spec: api.PodGroupSpec{
56-
MinMember: 1,
33+
Name: pgName,
5734
},
5835
}
5936
}

test/e2e/predicates.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ var _ = Describe("Predicates E2E Test", func() {
8484
name: "na-job",
8585
tasks: []taskSpec{
8686
{
87-
img: "nginx",
87+
img: defaultNginxImage,
8888
req: slot,
8989
min: 1,
9090
rep: rep,
@@ -130,7 +130,7 @@ var _ = Describe("Predicates E2E Test", func() {
130130
name: "pa-job",
131131
tasks: []taskSpec{
132132
{
133-
img: "nginx",
133+
img: defaultNginxImage,
134134
req: slot,
135135
min: rep,
136136
rep: rep,
@@ -177,7 +177,7 @@ var _ = Describe("Predicates E2E Test", func() {
177177
name: "pa-job",
178178
tasks: []taskSpec{
179179
{
180-
img: "nginx",
180+
img: defaultNginxImage,
181181
req: slot,
182182
min: 2,
183183
rep: 2,
@@ -222,7 +222,7 @@ var _ = Describe("Predicates E2E Test", func() {
222222
name: "tt-job",
223223
tasks: []taskSpec{
224224
{
225-
img: "nginx",
225+
img: defaultNginxImage,
226226
req: oneCPU,
227227
min: 1,
228228
rep: 1,
@@ -270,7 +270,7 @@ var _ = Describe("Predicates E2E Test", func() {
270270
name: "tt-job",
271271
tasks: []taskSpec{
272272
{
273-
img: "nginx",
273+
img: defaultNginxImage,
274274
req: oneCPU,
275275
min: 1,
276276
rep: 1,
@@ -283,7 +283,7 @@ var _ = Describe("Predicates E2E Test", func() {
283283
name: "tt-job-no-toleration",
284284
tasks: []taskSpec{
285285
{
286-
img: "nginx",
286+
img: defaultNginxImage,
287287
req: oneCPU,
288288
min: 1,
289289
rep: 1,

0 commit comments

Comments
 (0)