From fd2811af15ae050cac7fe42f0d7ad292dff27412 Mon Sep 17 00:00:00 2001 From: Miciah Masters Date: Mon, 16 Sep 2019 17:38:41 -0400 Subject: [PATCH 1/3] UPSTREAM: 80004: Prefer to delete doubled-up pods of a ReplicaSet When scaling down a ReplicaSet, delete doubled up replicas first, where a "doubled up replica" is defined as one that is on the same node as an active replica belonging to a related ReplicaSet. ReplicaSets are considered "related" if they have a common controller (typically a Deployment). The intention of this change is to make a rolling update of a Deployment scale down the old ReplicaSet as it scales up the new ReplicaSet by deleting pods from the old ReplicaSet that are colocated with ready pods of the new ReplicaSet. This change in the behavior of rolling updates can be combined with pod affinity rules to preserve the locality of a Deployment's pods over rollout. A specific scenario that benefits from this change is when a Deployment's pods are exposed by a Service that has type "LoadBalancer" and external traffic policy "Local". In this scenario, the load balancer uses health checks to determine whether it should forward traffic for the Service to a particular node. If the node has no local endpoints for the Service, the health check will fail for that node. Eventually, the load balancer will stop forwarding traffic to that node. In the meantime, the service proxy drops traffic for that Service. Thus, in order to reduce risk of dropping traffic during a rolling update, it is desirable preserve node locality of endpoints. * vendor/k8s.io/kubernetes/pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to sort pods using a given ranking. * vendor/k8s.io/kubernetes/pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks): New test for ActivePodsWithRanks. * vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set.go (getReplicaSetsWithSameController): New method. Given a ReplicaSet, return all ReplicaSets that have the same owner. (manageReplicas): Call getIndirectlyRelatedPods, and pass its result to getPodsToDelete. (getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all pods that are owned by any ReplicaSet with the same owner. (getPodsToDelete): Add an argument for related pods. Use related pods and the new getPodsRankedByRelatedPodsOnSameNode function to take into account whether the pod is doubled up when sorting pods for deletion. (getPodsRankedByRelatedPodsOnSameNode): New function. Return an ActivePodsWithRanks value that wraps the given slice of pods and computes ranks where each pod's rank is equal to the number of active related pods that are colocated on the same node. * vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set OwnerReferences on the ReplicaSet. (newPod): Set a unique UID on the pod. (byName): New type to sort pods by name. (TestRelatedPodsLookup): New test for getIndirectlyRelatedPods. (TestGetPodsToDelete): Augment the "various pod phases and conditions, diff = len(pods)" test case to ensure that scale-down still selects doubled-up pods if there are not enough other pods to scale down. Add a "various pod phases and conditions, diff = len(pods), relatedPods empty" test case to verify that getPodsToDelete works even if related pods could not be determined. Add a "ready and colocated with another ready pod vs not colocated, diff < len(pods)" test case to verify that a doubled-up pod gets preferred for deletion. Augment the "various pod phases and conditions, diff < len(pods)" test case to ensure that not-ready pods are preferred over ready but doubled-up pods. * vendor/k8s.io/kubernetes/pkg/controller/replicaset/BUILD: Regenerate. * vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go (testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end test. Create a deployment with a rolling update strategy and affinity rules and a load balancer with "Local" external traffic policy, and verify that set of nodes with local endponts for the service remains unchanged during rollouts. (setAffinity): New helper, used by testRollingUpdateDeploymentWithLocalTrafficLoadBalancer. * vendor/k8s.io/kubernetes/test/e2e/apps/types.go (AgnhostImageName) (AgnhostImage): New constants for the agnhost image. * vendor/k8s.io/kubernetes/test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the set of node names out... (GetEndpointNodeNames): ...into this new method. --- .../pkg/controller/controller_utils.go | 88 +++++ .../pkg/controller/controller_utils_test.go | 90 ++++++ .../pkg/controller/replicaset/BUILD | 1 + .../pkg/controller/replicaset/replica_set.go | 102 +++++- .../controller/replicaset/replica_set_test.go | 305 +++++++++++++++--- .../kubernetes/test/e2e/apps/deployment.go | 153 +++++++++ .../k8s.io/kubernetes/test/e2e/apps/types.go | 4 + .../test/e2e/framework/service/jig.go | 21 +- 8 files changed, 702 insertions(+), 62 deletions(-) diff --git a/vendor/k8s.io/kubernetes/pkg/controller/controller_utils.go b/vendor/k8s.io/kubernetes/pkg/controller/controller_utils.go index 025bf9d5ace0..17fcf98f9a71 100644 --- a/vendor/k8s.io/kubernetes/pkg/controller/controller_utils.go +++ b/vendor/k8s.io/kubernetes/pkg/controller/controller_utils.go @@ -771,6 +771,94 @@ func (s ActivePods) Less(i, j int) bool { return false } +// ActivePodsWithRanks is a sortable list of pods and a list of corresponding +// ranks which will be considered during sorting. The two lists must have equal +// length. After sorting, the pods will be ordered as follows, applying each +// rule in turn until one matches: +// +// 1. If only one of the pods is assigned to a node, the pod that is not +// assigned comes before the pod that is. +// 2. If the pods' phases differ, a pending pod comes before a pod whose phase +// is unknown, and a pod whose phase is unknown comes before a running pod. +// 3. If exactly one of the pods is ready, the pod that is not ready comes +// before the ready pod. +// 4. If the pods' ranks differ, the pod with greater rank comes before the pod +// with lower rank. +// 5. If both pods are ready but have not been ready for the same amount of +// time, the pod that has been ready for a shorter amount of time comes +// before the pod that has been ready for longer. +// 6. If one pod has a container that has restarted more than any container in +// the other pod, the pod with the container with more restarts comes +// before the other pod. +// 7. If the pods' creation times differ, the pod that was created more recently +// comes before the older pod. +// +// If none of these rules matches, the second pod comes before the first pod. +// +// The intention of this ordering is to put pods that should be preferred for +// deletion first in the list. +type ActivePodsWithRanks struct { + // Pods is a list of pods. + Pods []*v1.Pod + + // Rank is a ranking of pods. This ranking is used during sorting when + // comparing two pods that are both scheduled, in the same phase, and + // having the same ready status. + Rank []int +} + +func (s ActivePodsWithRanks) Len() int { + return len(s.Pods) +} + +func (s ActivePodsWithRanks) Swap(i, j int) { + s.Pods[i], s.Pods[j] = s.Pods[j], s.Pods[i] + s.Rank[i], s.Rank[j] = s.Rank[j], s.Rank[i] +} + +// Less compares two pods with corresponding ranks and returns true if the first +// one should be preferred for deletion. +func (s ActivePodsWithRanks) Less(i, j int) bool { + // 1. Unassigned < assigned + // If only one of the pods is unassigned, the unassigned one is smaller + if s.Pods[i].Spec.NodeName != s.Pods[j].Spec.NodeName && (len(s.Pods[i].Spec.NodeName) == 0 || len(s.Pods[j].Spec.NodeName) == 0) { + return len(s.Pods[i].Spec.NodeName) == 0 + } + // 2. PodPending < PodUnknown < PodRunning + m := map[v1.PodPhase]int{v1.PodPending: 0, v1.PodUnknown: 1, v1.PodRunning: 2} + if m[s.Pods[i].Status.Phase] != m[s.Pods[j].Status.Phase] { + return m[s.Pods[i].Status.Phase] < m[s.Pods[j].Status.Phase] + } + // 3. Not ready < ready + // If only one of the pods is not ready, the not ready one is smaller + if podutil.IsPodReady(s.Pods[i]) != podutil.IsPodReady(s.Pods[j]) { + return !podutil.IsPodReady(s.Pods[i]) + } + // 4. Doubled up < not doubled up + // If one of the two pods is on the same node as one or more additional + // ready pods that belong to the same replicaset, whichever pod has more + // colocated ready pods is less + if s.Rank[i] != s.Rank[j] { + return s.Rank[i] > s.Rank[j] + } + // TODO: take availability into account when we push minReadySeconds information from deployment into pods, + // see https://github.com/kubernetes/kubernetes/issues/22065 + // 5. Been ready for empty time < less time < more time + // If both pods are ready, the latest ready one is smaller + if podutil.IsPodReady(s.Pods[i]) && podutil.IsPodReady(s.Pods[j]) && !podReadyTime(s.Pods[i]).Equal(podReadyTime(s.Pods[j])) { + return afterOrZero(podReadyTime(s.Pods[i]), podReadyTime(s.Pods[j])) + } + // 6. Pods with containers with higher restart counts < lower restart counts + if maxContainerRestarts(s.Pods[i]) != maxContainerRestarts(s.Pods[j]) { + return maxContainerRestarts(s.Pods[i]) > maxContainerRestarts(s.Pods[j]) + } + // 7. Empty creation time pods < newer pods < older pods + if !s.Pods[i].CreationTimestamp.Equal(&s.Pods[j].CreationTimestamp) { + return afterOrZero(&s.Pods[i].CreationTimestamp, &s.Pods[j].CreationTimestamp) + } + return false +} + // afterOrZero checks if time t1 is after time t2; if one of them // is zero, the zero time is seen as after non-zero time. func afterOrZero(t1, t2 *metav1.Time) bool { diff --git a/vendor/k8s.io/kubernetes/pkg/controller/controller_utils_test.go b/vendor/k8s.io/kubernetes/pkg/controller/controller_utils_test.go index 3bdda84a662a..837a10b22a8e 100644 --- a/vendor/k8s.io/kubernetes/pkg/controller/controller_utils_test.go +++ b/vendor/k8s.io/kubernetes/pkg/controller/controller_utils_test.go @@ -429,6 +429,96 @@ func TestSortingActivePods(t *testing.T) { } } +func TestSortingActivePodsWithRanks(t *testing.T) { + now := metav1.Now() + then := metav1.Time{Time: now.AddDate(0, -1, 0)} + zeroTime := metav1.Time{} + pod := func(podName, nodeName string, phase v1.PodPhase, ready bool, restarts int32, readySince metav1.Time, created metav1.Time) *v1.Pod { + var conditions []v1.PodCondition + var containerStatuses []v1.ContainerStatus + if ready { + conditions = []v1.PodCondition{{Type: v1.PodReady, Status: v1.ConditionTrue, LastTransitionTime: readySince}} + containerStatuses = []v1.ContainerStatus{{RestartCount: restarts}} + } + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: created, + Name: podName, + }, + Spec: v1.PodSpec{NodeName: nodeName}, + Status: v1.PodStatus{ + Conditions: conditions, + ContainerStatuses: containerStatuses, + Phase: phase, + }, + } + } + var ( + unscheduledPod = pod("unscheduled", "", v1.PodPending, false, 0, zeroTime, zeroTime) + scheduledPendingPod = pod("pending", "node", v1.PodPending, false, 0, zeroTime, zeroTime) + unknownPhasePod = pod("unknown-phase", "node", v1.PodUnknown, false, 0, zeroTime, zeroTime) + runningNotReadyPod = pod("not-ready", "node", v1.PodRunning, false, 0, zeroTime, zeroTime) + runningReadyNoLastTransitionTimePod = pod("ready-no-last-transition-time", "node", v1.PodRunning, true, 0, zeroTime, zeroTime) + runningReadyNow = pod("ready-now", "node", v1.PodRunning, true, 0, now, now) + runningReadyThen = pod("ready-then", "node", v1.PodRunning, true, 0, then, then) + runningReadyNowHighRestarts = pod("ready-high-restarts", "node", v1.PodRunning, true, 9001, now, now) + runningReadyNowCreatedThen = pod("ready-now-created-then", "node", v1.PodRunning, true, 0, now, then) + ) + equalityTests := []*v1.Pod{ + unscheduledPod, + scheduledPendingPod, + unknownPhasePod, + runningNotReadyPod, + runningReadyNowCreatedThen, + runningReadyNow, + runningReadyThen, + runningReadyNowHighRestarts, + runningReadyNowCreatedThen, + } + for _, pod := range equalityTests { + podsWithRanks := ActivePodsWithRanks{ + Pods: []*v1.Pod{pod, pod}, + Rank: []int{1, 1}, + } + if podsWithRanks.Less(0, 1) || podsWithRanks.Less(1, 0) { + t.Errorf("expected pod %q not to be less than than itself", pod.Name) + } + } + type podWithRank struct { + pod *v1.Pod + rank int + } + inequalityTests := []struct { + lesser, greater podWithRank + }{ + {podWithRank{unscheduledPod, 1}, podWithRank{scheduledPendingPod, 2}}, + {podWithRank{unscheduledPod, 2}, podWithRank{scheduledPendingPod, 1}}, + {podWithRank{scheduledPendingPod, 1}, podWithRank{unknownPhasePod, 2}}, + {podWithRank{unknownPhasePod, 1}, podWithRank{runningNotReadyPod, 2}}, + {podWithRank{runningNotReadyPod, 1}, podWithRank{runningReadyNoLastTransitionTimePod, 1}}, + {podWithRank{runningReadyNoLastTransitionTimePod, 1}, podWithRank{runningReadyNow, 1}}, + {podWithRank{runningReadyNow, 2}, podWithRank{runningReadyNoLastTransitionTimePod, 1}}, + {podWithRank{runningReadyNow, 1}, podWithRank{runningReadyThen, 1}}, + {podWithRank{runningReadyNow, 2}, podWithRank{runningReadyThen, 1}}, + {podWithRank{runningReadyNowHighRestarts, 1}, podWithRank{runningReadyNow, 1}}, + {podWithRank{runningReadyNow, 2}, podWithRank{runningReadyNowHighRestarts, 1}}, + {podWithRank{runningReadyNow, 1}, podWithRank{runningReadyNowCreatedThen, 1}}, + {podWithRank{runningReadyNowCreatedThen, 2}, podWithRank{runningReadyNow, 1}}, + } + for _, test := range inequalityTests { + podsWithRanks := ActivePodsWithRanks{ + Pods: []*v1.Pod{test.lesser.pod, test.greater.pod}, + Rank: []int{test.lesser.rank, test.greater.rank}, + } + if !podsWithRanks.Less(0, 1) { + t.Errorf("expected pod %q with rank %v to be less than %q with rank %v", podsWithRanks.Pods[0].Name, podsWithRanks.Rank[0], podsWithRanks.Pods[1].Name, podsWithRanks.Rank[1]) + } + if podsWithRanks.Less(1, 0) { + t.Errorf("expected pod %q with rank %v not to be less than %v with rank %v", podsWithRanks.Pods[1].Name, podsWithRanks.Rank[1], podsWithRanks.Pods[0].Name, podsWithRanks.Rank[0]) + } + } +} + func TestActiveReplicaSetsFiltering(t *testing.T) { var replicaSets []*apps.ReplicaSet replicaSets = append(replicaSets, newReplicaSet("zero", 0)) diff --git a/vendor/k8s.io/kubernetes/pkg/controller/replicaset/BUILD b/vendor/k8s.io/kubernetes/pkg/controller/replicaset/BUILD index c3c879f8b34b..00da56d365eb 100644 --- a/vendor/k8s.io/kubernetes/pkg/controller/replicaset/BUILD +++ b/vendor/k8s.io/kubernetes/pkg/controller/replicaset/BUILD @@ -24,6 +24,7 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//staging/src/k8s.io/client-go/informers/apps/v1:go_default_library", diff --git a/vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set.go b/vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set.go index b61330afd579..1940062a61d0 100644 --- a/vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set.go +++ b/vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set.go @@ -41,6 +41,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" appsinformers "k8s.io/client-go/informers/apps/v1" @@ -193,6 +194,43 @@ func (rsc *ReplicaSetController) Run(workers int, stopCh <-chan struct{}) { <-stopCh } +// getReplicaSetsWithSameController returns a list of ReplicaSets with the same +// owner as the given ReplicaSet. +func (rsc *ReplicaSetController) getReplicaSetsWithSameController(rs *apps.ReplicaSet) []*apps.ReplicaSet { + controllerRef := metav1.GetControllerOf(rs) + if controllerRef == nil { + utilruntime.HandleError(fmt.Errorf("ReplicaSet has no controller: %v", rs)) + return nil + } + + allRSs, err := rsc.rsLister.ReplicaSets(rs.Namespace).List(labels.Everything()) + if err != nil { + utilruntime.HandleError(err) + return nil + } + + var relatedRSs []*apps.ReplicaSet + for _, r := range allRSs { + if ref := metav1.GetControllerOf(r); ref != nil && ref.UID == controllerRef.UID { + relatedRSs = append(relatedRSs, r) + } + } + + if klog.V(2) { + var related string + if len(relatedRSs) > 0 { + var relatedNames []string + for _, r := range relatedRSs { + relatedNames = append(relatedNames, r.Name) + } + related = ": " + strings.Join(relatedNames, ", ") + } + klog.Infof("Found %d related %vs for %v %s/%s%s", len(relatedRSs), rsc.Kind, rsc.Kind, rs.Namespace, rs.Name, related) + } + + return relatedRSs +} + // getPodReplicaSets returns a list of ReplicaSets matching the given pod. func (rsc *ReplicaSetController) getPodReplicaSets(pod *v1.Pod) []*apps.ReplicaSet { rss, err := rsc.rsLister.GetPodReplicaSets(pod) @@ -515,8 +553,11 @@ func (rsc *ReplicaSetController) manageReplicas(filteredPods []*v1.Pod, rs *apps } klog.V(2).Infof("Too many replicas for %v %s/%s, need %d, deleting %d", rsc.Kind, rs.Namespace, rs.Name, *(rs.Spec.Replicas), diff) + relatedPods, err := rsc.getIndirectlyRelatedPods(rs) + utilruntime.HandleError(err) + // Choose which Pods to delete, preferring those in earlier phases of startup. - podsToDelete := getPodsToDelete(filteredPods, diff) + podsToDelete := getPodsToDelete(filteredPods, relatedPods, diff) // Snapshot the UIDs (ns/name) of the pods we're expecting to see // deleted, so we know to record their expectations exactly once either @@ -681,18 +722,67 @@ func slowStartBatch(count int, initialBatchSize int, fn func() error) (int, erro return successes, nil } -func getPodsToDelete(filteredPods []*v1.Pod, diff int) []*v1.Pod { +// getIndirectlyRelatedPods returns all pods that are owned by any ReplicaSet +// that is owned by the given ReplicaSet's owner. +func (rsc *ReplicaSetController) getIndirectlyRelatedPods(rs *apps.ReplicaSet) ([]*v1.Pod, error) { + var relatedPods []*v1.Pod + seen := make(map[types.UID]*apps.ReplicaSet) + for _, relatedRS := range rsc.getReplicaSetsWithSameController(rs) { + selector, err := metav1.LabelSelectorAsSelector(relatedRS.Spec.Selector) + if err != nil { + return nil, err + } + pods, err := rsc.podLister.Pods(relatedRS.Namespace).List(selector) + if err != nil { + return nil, err + } + for _, pod := range pods { + if otherRS, found := seen[pod.UID]; found { + klog.V(5).Infof("Pod %s/%s is owned by both %v %s/%s and %v %s/%s", pod.Namespace, pod.Name, rsc.Kind, otherRS.Namespace, otherRS.Name, rsc.Kind, relatedRS.Namespace, relatedRS.Name) + continue + } + seen[pod.UID] = relatedRS + relatedPods = append(relatedPods, pod) + } + } + if klog.V(4) { + var relatedNames []string + for _, related := range relatedPods { + relatedNames = append(relatedNames, related.Name) + } + klog.Infof("Found %d related pods for %v %s/%s: %v", len(relatedPods), rsc.Kind, rs.Namespace, rs.Name, strings.Join(relatedNames, ", ")) + } + return relatedPods, nil +} + +func getPodsToDelete(filteredPods, relatedPods []*v1.Pod, diff int) []*v1.Pod { // No need to sort pods if we are about to delete all of them. // diff will always be <= len(filteredPods), so not need to handle > case. if diff < len(filteredPods) { - // Sort the pods in the order such that not-ready < ready, unscheduled - // < scheduled, and pending < running. This ensures that we delete pods - // in the earlier stages whenever possible. - sort.Sort(controller.ActivePods(filteredPods)) + podsWithRanks := getPodsRankedByRelatedPodsOnSameNode(filteredPods, relatedPods) + sort.Sort(podsWithRanks) } return filteredPods[:diff] } +// getPodsRankedByRelatedPodsOnSameNode returns an ActivePodsWithRanks value +// that wraps podsToRank and assigns each pod a rank equal to the number of +// active pods in relatedPods that are colocated on the same node with the pod. +// relatedPods generally should be a superset of podsToRank. +func getPodsRankedByRelatedPodsOnSameNode(podsToRank, relatedPods []*v1.Pod) controller.ActivePodsWithRanks { + podsOnNode := make(map[string]int) + for _, pod := range relatedPods { + if controller.IsPodActive(pod) { + podsOnNode[pod.Spec.NodeName]++ + } + } + ranks := make([]int, len(podsToRank)) + for i, pod := range podsToRank { + ranks[i] = podsOnNode[pod.Spec.NodeName] + } + return controller.ActivePodsWithRanks{Pods: podsToRank, Rank: ranks} +} + func getPodKeys(pods []*v1.Pod) []string { podKeys := make([]string, 0, len(pods)) for _, pod := range pods { diff --git a/vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set_test.go b/vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set_test.go index 99847bc7a92d..4f6dafe3ebaa 100644 --- a/vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set_test.go +++ b/vendor/k8s.io/kubernetes/pkg/controller/replicaset/replica_set_test.go @@ -23,6 +23,7 @@ import ( "net/http/httptest" "net/url" "reflect" + "sort" "strings" "sync" "testing" @@ -80,12 +81,16 @@ func skipListerFunc(verb string, url url.URL) bool { var alwaysReady = func() bool { return true } func newReplicaSet(replicas int, selectorMap map[string]string) *apps.ReplicaSet { + isController := true rs := &apps.ReplicaSet{ TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "ReplicaSet"}, ObjectMeta: metav1.ObjectMeta{ - UID: uuid.NewUUID(), - Name: "foobar", - Namespace: metav1.NamespaceDefault, + UID: uuid.NewUUID(), + Name: "foobar", + Namespace: metav1.NamespaceDefault, + OwnerReferences: []metav1.OwnerReference{ + {UID: "123", Controller: &isController}, + }, ResourceVersion: "18", }, Spec: apps.ReplicaSetSpec{ @@ -136,6 +141,7 @@ func newPod(name string, rs *apps.ReplicaSet, status v1.PodPhase, lastTransition } return &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ + UID: uuid.NewUUID(), Name: name, Namespace: rs.Namespace, Labels: rs.Spec.Selector.MatchLabels, @@ -342,6 +348,68 @@ func TestSyncReplicaSetDormancy(t *testing.T) { fakeHandler.ValidateRequestCount(t, 2) } +func TestGetReplicaSetsWithSameController(t *testing.T) { + someRS := newReplicaSet(1, map[string]string{"foo": "bar"}) + someRS.Name = "rs1" + relatedRS := newReplicaSet(1, map[string]string{"foo": "baz"}) + relatedRS.Name = "rs2" + unrelatedRS := newReplicaSet(1, map[string]string{"foo": "quux"}) + unrelatedRS.Name = "rs3" + unrelatedRS.ObjectMeta.OwnerReferences[0].UID = "456" + pendingDeletionRS := newReplicaSet(1, map[string]string{"foo": "xyzzy"}) + pendingDeletionRS.Name = "rs4" + pendingDeletionRS.ObjectMeta.OwnerReferences[0].UID = "789" + now := metav1.Now() + pendingDeletionRS.DeletionTimestamp = &now + + stopCh := make(chan struct{}) + defer close(stopCh) + manager, informers := testNewReplicaSetControllerFromClient(clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}}), stopCh, BurstReplicas) + testCases := []struct { + name string + rss []*apps.ReplicaSet + rs *apps.ReplicaSet + expectedRSs []*apps.ReplicaSet + }{ + { + name: "expect to get back a ReplicaSet that is pending deletion", + rss: []*apps.ReplicaSet{pendingDeletionRS, unrelatedRS}, + rs: pendingDeletionRS, + expectedRSs: []*apps.ReplicaSet{pendingDeletionRS}, + }, + { + name: "expect to get back only the given ReplicaSet if there is no related ReplicaSet", + rss: []*apps.ReplicaSet{someRS, unrelatedRS}, + rs: someRS, + expectedRSs: []*apps.ReplicaSet{someRS}, + }, + { + name: "expect to get back the given ReplicaSet as well as any related ReplicaSet but not an unrelated ReplicaSet", + rss: []*apps.ReplicaSet{someRS, relatedRS, unrelatedRS}, + rs: someRS, + expectedRSs: []*apps.ReplicaSet{someRS, relatedRS}, + }, + } + for _, c := range testCases { + for _, r := range c.rss { + informers.Apps().V1().ReplicaSets().Informer().GetIndexer().Add(r) + } + actualRSs := manager.getReplicaSetsWithSameController(c.rs) + var actualRSNames, expectedRSNames []string + for _, r := range actualRSs { + actualRSNames = append(actualRSNames, r.Name) + } + for _, r := range c.expectedRSs { + expectedRSNames = append(expectedRSNames, r.Name) + } + sort.Strings(actualRSNames) + sort.Strings(expectedRSNames) + if !reflect.DeepEqual(actualRSNames, expectedRSNames) { + t.Errorf("Got [%s]; expected [%s]", strings.Join(actualRSNames, ", "), strings.Join(expectedRSNames, ", ")) + } + } +} + func TestPodControllerLookup(t *testing.T) { stopCh := make(chan struct{}) defer close(stopCh) @@ -408,6 +476,87 @@ func TestPodControllerLookup(t *testing.T) { } } +// byName sorts pods by their names. +type byName []*v1.Pod + +func (pods byName) Len() int { return len(pods) } +func (pods byName) Swap(i, j int) { pods[i], pods[j] = pods[j], pods[i] } +func (pods byName) Less(i, j int) bool { return pods[i].Name < pods[j].Name } + +func TestRelatedPodsLookup(t *testing.T) { + someRS := newReplicaSet(1, map[string]string{"foo": "bar"}) + someRS.Name = "foo1" + relatedRS := newReplicaSet(1, map[string]string{"foo": "baz"}) + relatedRS.Name = "foo2" + unrelatedRS := newReplicaSet(1, map[string]string{"foo": "quux"}) + unrelatedRS.Name = "bar1" + unrelatedRS.ObjectMeta.OwnerReferences[0].UID = "456" + pendingDeletionRS := newReplicaSet(1, map[string]string{"foo": "xyzzy"}) + pendingDeletionRS.Name = "foo3" + pendingDeletionRS.ObjectMeta.OwnerReferences[0].UID = "789" + now := metav1.Now() + pendingDeletionRS.DeletionTimestamp = &now + pod1 := newPod("pod1", someRS, v1.PodRunning, nil, true) + pod2 := newPod("pod2", someRS, v1.PodRunning, nil, true) + pod3 := newPod("pod3", relatedRS, v1.PodRunning, nil, true) + pod4 := newPod("pod4", unrelatedRS, v1.PodRunning, nil, true) + + stopCh := make(chan struct{}) + defer close(stopCh) + manager, informers := testNewReplicaSetControllerFromClient(clientset.NewForConfigOrDie(&restclient.Config{Host: "", ContentConfig: restclient.ContentConfig{GroupVersion: &schema.GroupVersion{Group: "", Version: "v1"}}}), stopCh, BurstReplicas) + testCases := []struct { + name string + rss []*apps.ReplicaSet + pods []*v1.Pod + rs *apps.ReplicaSet + expectedPodNames []string + }{ + { + name: "expect to get a pod even if its owning ReplicaSet is pending deletion", + rss: []*apps.ReplicaSet{pendingDeletionRS, unrelatedRS}, + rs: pendingDeletionRS, + pods: []*v1.Pod{newPod("pod", pendingDeletionRS, v1.PodRunning, nil, true)}, + expectedPodNames: []string{"pod"}, + }, + { + name: "expect to get only the ReplicaSet's own pods if there is no related ReplicaSet", + rss: []*apps.ReplicaSet{someRS, unrelatedRS}, + rs: someRS, + pods: []*v1.Pod{pod1, pod2, pod4}, + expectedPodNames: []string{"pod1", "pod2"}, + }, + { + name: "expect to get own pods as well as any related ReplicaSet's but not an unrelated ReplicaSet's", + rss: []*apps.ReplicaSet{someRS, relatedRS, unrelatedRS}, + rs: someRS, + pods: []*v1.Pod{pod1, pod2, pod3, pod4}, + expectedPodNames: []string{"pod1", "pod2", "pod3"}, + }, + } + for _, c := range testCases { + for _, r := range c.rss { + informers.Apps().V1().ReplicaSets().Informer().GetIndexer().Add(r) + } + for _, pod := range c.pods { + informers.Core().V1().Pods().Informer().GetIndexer().Add(pod) + manager.addPod(pod) + } + actualPods, err := manager.getIndirectlyRelatedPods(c.rs) + if err != nil { + t.Errorf("Unexpected error from getIndirectlyRelatedPods: %v", err) + } + var actualPodNames []string + for _, pod := range actualPods { + actualPodNames = append(actualPodNames, pod.Name) + } + sort.Strings(actualPodNames) + sort.Strings(c.expectedPodNames) + if !reflect.DeepEqual(actualPodNames, c.expectedPodNames) { + t.Errorf("Got [%s]; expected [%s]", strings.Join(actualPodNames, ", "), strings.Join(c.expectedPodNames, ", ")) + } + } +} + func TestWatchControllers(t *testing.T) { fakeWatch := watch.NewFake() client := fake.NewSimpleClientset() @@ -1445,10 +1594,19 @@ func TestGetPodsToDelete(t *testing.T) { Status: v1.ConditionFalse, }, } - // a scheduled, running, ready pod - scheduledRunningReadyPod := newPod("scheduled-running-ready-pod", rs, v1.PodRunning, nil, true) - scheduledRunningReadyPod.Spec.NodeName = "fake-node" - scheduledRunningReadyPod.Status.Conditions = []v1.PodCondition{ + // a scheduled, running, ready pod on fake-node-1 + scheduledRunningReadyPodOnNode1 := newPod("scheduled-running-ready-pod-on-node-1", rs, v1.PodRunning, nil, true) + scheduledRunningReadyPodOnNode1.Spec.NodeName = "fake-node-1" + scheduledRunningReadyPodOnNode1.Status.Conditions = []v1.PodCondition{ + { + Type: v1.PodReady, + Status: v1.ConditionTrue, + }, + } + // a scheduled, running, ready pod on fake-node-2 + scheduledRunningReadyPodOnNode2 := newPod("scheduled-running-ready-pod-on-node-2", rs, v1.PodRunning, nil, true) + scheduledRunningReadyPodOnNode2.Spec.NodeName = "fake-node-2" + scheduledRunningReadyPodOnNode2.Status.Conditions = []v1.PodCondition{ { Type: v1.PodReady, Status: v1.ConditionTrue, @@ -1456,8 +1614,10 @@ func TestGetPodsToDelete(t *testing.T) { } tests := []struct { - name string - pods []*v1.Pod + name string + pods []*v1.Pod + // related defaults to pods if nil. + related []*v1.Pod diff int expectedPodsToDelete []*v1.Pod }{ @@ -1465,93 +1625,136 @@ func TestGetPodsToDelete(t *testing.T) { // an unscheduled, pending pod // a scheduled, pending pod // a scheduled, running, not-ready pod - // a scheduled, running, ready pod + // a scheduled, running, ready pod on same node as a related pod + // a scheduled, running, ready pod not on node with related pods // Note that a pending pod cannot be ready { - "len(pods) = 0 (i.e., diff = 0 too)", - []*v1.Pod{}, - 0, - []*v1.Pod{}, + name: "len(pods) = 0 (i.e., diff = 0 too)", + pods: []*v1.Pod{}, + diff: 0, + expectedPodsToDelete: []*v1.Pod{}, }, { - "diff = len(pods)", - []*v1.Pod{ + name: "diff = len(pods)", + pods: []*v1.Pod{ scheduledRunningNotReadyPod, - scheduledRunningReadyPod, + scheduledRunningReadyPodOnNode1, }, - 2, - []*v1.Pod{scheduledRunningNotReadyPod, scheduledRunningReadyPod}, + diff: 2, + expectedPodsToDelete: []*v1.Pod{scheduledRunningNotReadyPod, scheduledRunningReadyPodOnNode1}, }, { - "diff < len(pods)", - []*v1.Pod{ - scheduledRunningReadyPod, + name: "diff < len(pods)", + pods: []*v1.Pod{ + scheduledRunningReadyPodOnNode1, scheduledRunningNotReadyPod, }, - 1, - []*v1.Pod{scheduledRunningNotReadyPod}, + diff: 1, + expectedPodsToDelete: []*v1.Pod{scheduledRunningNotReadyPod}, }, { - "various pod phases and conditions, diff = len(pods)", - []*v1.Pod{ - scheduledRunningReadyPod, + name: "various pod phases and conditions, diff = len(pods)", + pods: []*v1.Pod{ + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode2, scheduledRunningNotReadyPod, scheduledPendingPod, unscheduledPendingPod, }, - 4, - []*v1.Pod{ - scheduledRunningReadyPod, + diff: 6, + expectedPodsToDelete: []*v1.Pod{ + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode2, scheduledRunningNotReadyPod, scheduledPendingPod, unscheduledPendingPod, }, }, { - "scheduled vs unscheduled, diff < len(pods)", - []*v1.Pod{ + name: "various pod phases and conditions, diff = len(pods), relatedPods empty", + pods: []*v1.Pod{ + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode2, + scheduledRunningNotReadyPod, scheduledPendingPod, unscheduledPendingPod, }, - 1, - []*v1.Pod{ + related: []*v1.Pod{}, + diff: 6, + expectedPodsToDelete: []*v1.Pod{ + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode2, + scheduledRunningNotReadyPod, + scheduledPendingPod, unscheduledPendingPod, }, }, { - "ready vs not-ready, diff < len(pods)", - []*v1.Pod{ - scheduledRunningReadyPod, + name: "scheduled vs unscheduled, diff < len(pods)", + pods: []*v1.Pod{ + scheduledPendingPod, + unscheduledPendingPod, + }, + diff: 1, + expectedPodsToDelete: []*v1.Pod{ + unscheduledPendingPod, + }, + }, + { + name: "ready vs not-ready, diff < len(pods)", + pods: []*v1.Pod{ + scheduledRunningReadyPodOnNode1, scheduledRunningNotReadyPod, scheduledRunningNotReadyPod, }, - 2, - []*v1.Pod{ + diff: 2, + expectedPodsToDelete: []*v1.Pod{ scheduledRunningNotReadyPod, scheduledRunningNotReadyPod, }, }, { - "pending vs running, diff < len(pods)", - []*v1.Pod{ + name: "ready and colocated with another ready pod vs not colocated, diff < len(pods)", + pods: []*v1.Pod{ + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode2, + }, + related: []*v1.Pod{ + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode2, + scheduledRunningReadyPodOnNode2, + }, + diff: 1, + expectedPodsToDelete: []*v1.Pod{ + scheduledRunningReadyPodOnNode2, + }, + }, + { + name: "pending vs running, diff < len(pods)", + pods: []*v1.Pod{ scheduledPendingPod, scheduledRunningNotReadyPod, }, - 1, - []*v1.Pod{ + diff: 1, + expectedPodsToDelete: []*v1.Pod{ scheduledPendingPod, }, }, { - "various pod phases and conditions, diff < len(pods)", - []*v1.Pod{ - scheduledRunningReadyPod, + name: "various pod phases and conditions, diff < len(pods)", + pods: []*v1.Pod{ + scheduledRunningReadyPodOnNode1, + scheduledRunningReadyPodOnNode2, scheduledRunningNotReadyPod, scheduledPendingPod, unscheduledPendingPod, }, - 3, - []*v1.Pod{ + diff: 3, + expectedPodsToDelete: []*v1.Pod{ unscheduledPendingPod, scheduledPendingPod, scheduledRunningNotReadyPod, @@ -1560,7 +1763,11 @@ func TestGetPodsToDelete(t *testing.T) { } for _, test := range tests { - podsToDelete := getPodsToDelete(test.pods, test.diff) + related := test.related + if related == nil { + related = test.pods + } + podsToDelete := getPodsToDelete(test.pods, related, test.diff) if len(podsToDelete) != len(test.expectedPodsToDelete) { t.Errorf("%s: unexpected pods to delete, expected %v, got %v", test.name, test.expectedPodsToDelete, podsToDelete) } diff --git a/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go b/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go index 1a354f20caa8..3f0cd05dc64c 100644 --- a/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go +++ b/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go @@ -42,6 +42,7 @@ import ( e2edeploy "k8s.io/kubernetes/test/e2e/framework/deployment" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" "k8s.io/kubernetes/test/e2e/framework/replicaset" + e2eservice "k8s.io/kubernetes/test/e2e/framework/service" testutil "k8s.io/kubernetes/test/utils" utilpointer "k8s.io/utils/pointer" ) @@ -119,6 +120,10 @@ var _ = SIGDescribe("Deployment", func() { framework.ConformanceIt("deployment should support proportional scaling", func() { testProportionalScalingDeployment(f) }) + ginkgo.It("should not disrupt a cloud load-balancer's connectivity during rollout", func() { + framework.SkipUnlessProviderIs("aws", "azure", "gce", "gke") + testRollingUpdateDeploymentWithLocalTrafficLoadBalancer(f) + }) // TODO: add tests that cover deployment.Spec.MinReadySeconds once we solved clock-skew issues // See https://github.com/kubernetes/kubernetes/issues/29229 }) @@ -856,3 +861,151 @@ func orphanDeploymentReplicaSets(c clientset.Interface, d *appsv1.Deployment) er deleteOptions.Preconditions = metav1.NewUIDPreconditions(string(d.UID)) return c.AppsV1().Deployments(d.Namespace).Delete(d.Name, deleteOptions) } + +func testRollingUpdateDeploymentWithLocalTrafficLoadBalancer(f *framework.Framework) { + ns := f.Namespace.Name + c := f.ClientSet + + name := "test-rolling-update-with-lb" + framework.Logf("Creating Deployment %q", name) + podLabels := map[string]string{"name": name} + replicas := int32(3) + d := e2edeploy.NewDeployment(name, replicas, podLabels, AgnhostImageName, AgnhostImage, appsv1.RollingUpdateDeploymentStrategyType) + // NewDeployment assigned the same value to both d.Spec.Selector and + // d.Spec.Template.Labels, so mutating the one would mutate the other. + // Thus we need to set d.Spec.Template.Labels to a new value if we want + // to mutate it alone. + d.Spec.Template.Labels = map[string]string{ + "iteration": "0", + "name": name, + } + d.Spec.Template.Spec.Containers[0].Args = []string{"netexec", "--http-port=80", "--udp-port=80"} + // To ensure that a node that had a local endpoint prior to a rolling + // update continues to have a local endpoint throughout the rollout, we + // need an affinity policy that will cause pods to be scheduled on the + // same nodes as old pods, and we need the deployment to scale up a new + // pod before deleting an old pod. This affinity policy will define + // inter-pod affinity for pods of different rollouts and anti-affinity + // for pods of the same rollout, so it will need to be updated when + // performing a rollout. + setAffinity(d) + d.Spec.Strategy.RollingUpdate = &appsv1.RollingUpdateDeployment{ + MaxSurge: intOrStrP(1), + MaxUnavailable: intOrStrP(0), + } + deployment, err := c.AppsV1().Deployments(ns).Create(d) + framework.ExpectNoError(err) + err = e2edeploy.WaitForDeploymentComplete(c, deployment) + framework.ExpectNoError(err) + + framework.Logf("Creating a service %s with type=LoadBalancer and externalTrafficPolicy=Local in namespace %s", name, ns) + jig := e2eservice.NewTestJig(c, name) + jig.Labels = podLabels + service := jig.CreateLoadBalancerService(ns, name, e2eservice.LoadBalancerCreateTimeoutDefault, func(svc *v1.Service) { + svc.Spec.ExternalTrafficPolicy = v1.ServiceExternalTrafficPolicyTypeLocal + }) + + lbNameOrAddress := e2eservice.GetIngressPoint(&service.Status.LoadBalancer.Ingress[0]) + svcPort := int(service.Spec.Ports[0].Port) + + framework.Logf("Hitting the replica set's pods through the service's load balancer") + timeout := e2eservice.LoadBalancerLagTimeoutDefault + if framework.ProviderIs("aws") { + timeout = e2eservice.LoadBalancerLagTimeoutAWS + } + jig.TestReachableHTTP(lbNameOrAddress, svcPort, timeout) + + framework.Logf("Starting a goroutine to watch the service's endpoints in the background") + done := make(chan struct{}) + failed := make(chan struct{}) + defer close(done) + go func() { + defer ginkgo.GinkgoRecover() + expectedNodes := jig.GetEndpointNodeNames(service) + // The affinity policy should ensure that before an old pod is + // deleted, a new pod will have been created on the same node. + // Thus the set of nodes with local endpoints for the service + // should remain unchanged. + wait.Until(func() { + actualNodes := jig.GetEndpointNodeNames(service) + if !actualNodes.Equal(expectedNodes) { + framework.Logf("The set of nodes with local endpoints changed; started with %v, now have %v", expectedNodes.List(), actualNodes.List()) + failed <- struct{}{} + } + }, framework.Poll, done) + }() + + framework.Logf("Triggering a rolling deployment several times") + for i := 1; i <= 3; i++ { + framework.Logf("Updating label deployment %q pod spec (iteration #%d)", name, i) + deployment, err = e2edeploy.UpdateDeploymentWithRetries(c, ns, d.Name, func(update *appsv1.Deployment) { + update.Spec.Template.Labels["iteration"] = fmt.Sprintf("%d", i) + setAffinity(update) + }) + framework.ExpectNoError(err) + + framework.Logf("Waiting for observed generation %d", deployment.Generation) + err = e2edeploy.WaitForObservedDeployment(c, ns, name, deployment.Generation) + framework.ExpectNoError(err) + + framework.Logf("Make sure deployment %q is complete", name) + err = e2edeploy.WaitForDeploymentCompleteAndCheckRolling(c, deployment) + framework.ExpectNoError(err) + } + + select { + case <-failed: + framework.Failf("Connectivity to the load balancer was interrupted") + case <-time.After(1 * time.Minute): + } +} + +func setAffinity(d *appsv1.Deployment) { + d.Spec.Template.Spec.Affinity = &v1.Affinity{ + PodAffinity: &v1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ + { + Weight: int32(100), + PodAffinityTerm: v1.PodAffinityTerm{ + TopologyKey: "kubernetes.io/hostname", + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{d.Spec.Template.Labels["name"]}, + }, + { + Key: "iteration", + Operator: metav1.LabelSelectorOpNotIn, + Values: []string{d.Spec.Template.Labels["iteration"]}, + }, + }, + }, + }, + }, + }, + }, + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + TopologyKey: "kubernetes.io/hostname", + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{d.Spec.Template.Labels["name"]}, + }, + { + Key: "iteration", + Operator: metav1.LabelSelectorOpIn, + Values: []string{d.Spec.Template.Labels["iteration"]}, + }, + }, + }, + }, + }, + }, + } +} diff --git a/vendor/k8s.io/kubernetes/test/e2e/apps/types.go b/vendor/k8s.io/kubernetes/test/e2e/apps/types.go index ffe2d785742f..976b40259565 100644 --- a/vendor/k8s.io/kubernetes/test/e2e/apps/types.go +++ b/vendor/k8s.io/kubernetes/test/e2e/apps/types.go @@ -25,6 +25,7 @@ import ( const ( WebserverImageName = "httpd" RedisImageName = "redis" + AgnhostImageName = "agnhost" ) var ( @@ -48,4 +49,7 @@ var ( // RedisImage is the fully qualified URI to the Redis image RedisImage = imageutils.GetE2EImage(imageutils.Redis) + + // AgnhostImage is the fully qualified URI to the agnhost image. + AgnhostImage = imageutils.GetE2EImage(imageutils.Agnhost) ) diff --git a/vendor/k8s.io/kubernetes/test/e2e/framework/service/jig.go b/vendor/k8s.io/kubernetes/test/e2e/framework/service/jig.go index 3de36de06dcd..f74b693ccab6 100644 --- a/vendor/k8s.io/kubernetes/test/e2e/framework/service/jig.go +++ b/vendor/k8s.io/kubernetes/test/e2e/framework/service/jig.go @@ -271,6 +271,19 @@ func (j *TestJig) CreateLoadBalancerService(namespace, serviceName string, timeo // endpoints of the given Service are running. func (j *TestJig) GetEndpointNodes(svc *v1.Service) map[string][]string { nodes := j.GetNodes(MaxNodesForEndpointsTests) + epNodes := j.GetEndpointNodeNames(svc) + nodeMap := map[string][]string{} + for _, n := range nodes.Items { + if epNodes.Has(n.Name) { + nodeMap[n.Name] = e2enode.GetAddresses(&n, v1.NodeExternalIP) + } + } + return nodeMap +} + +// GetEndpointNodeNames returns a string set of node names on which the +// endpoints of the given Service are running. +func (j *TestJig) GetEndpointNodeNames(svc *v1.Service) sets.String { endpoints, err := j.Client.CoreV1().Endpoints(svc.Namespace).Get(svc.Name, metav1.GetOptions{}) if err != nil { framework.Failf("Get endpoints for service %s/%s failed (%s)", svc.Namespace, svc.Name, err) @@ -286,13 +299,7 @@ func (j *TestJig) GetEndpointNodes(svc *v1.Service) map[string][]string { } } } - nodeMap := map[string][]string{} - for _, n := range nodes.Items { - if epNodes.Has(n.Name) { - nodeMap[n.Name] = e2enode.GetAddresses(&n, v1.NodeExternalIP) - } - } - return nodeMap + return epNodes } // GetNodes returns the first maxNodesForTest nodes. Useful in large clusters From 6a89b2329174def6ba842932c72654ff8ed419d5 Mon Sep 17 00:00:00 2001 From: wojtekt Date: Wed, 30 Oct 2019 13:01:31 -0400 Subject: [PATCH 2/3] UPSTREAM: 84339: Fix deployment e2e test at scale --- .../kubernetes/test/e2e/apps/deployment.go | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go b/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go index 3f0cd05dc64c..e8279f4cd162 100644 --- a/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go +++ b/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go @@ -888,7 +888,7 @@ func testRollingUpdateDeploymentWithLocalTrafficLoadBalancer(f *framework.Framew // inter-pod affinity for pods of different rollouts and anti-affinity // for pods of the same rollout, so it will need to be updated when // performing a rollout. - setAffinity(d) + setAffinities(d, false) d.Spec.Strategy.RollingUpdate = &appsv1.RollingUpdateDeployment{ MaxSurge: intOrStrP(1), MaxUnavailable: intOrStrP(0), @@ -940,7 +940,7 @@ func testRollingUpdateDeploymentWithLocalTrafficLoadBalancer(f *framework.Framew framework.Logf("Updating label deployment %q pod spec (iteration #%d)", name, i) deployment, err = e2edeploy.UpdateDeploymentWithRetries(c, ns, d.Name, func(update *appsv1.Deployment) { update.Spec.Template.Labels["iteration"] = fmt.Sprintf("%d", i) - setAffinity(update) + setAffinities(update, true) }) framework.ExpectNoError(err) @@ -960,33 +960,38 @@ func testRollingUpdateDeploymentWithLocalTrafficLoadBalancer(f *framework.Framew } } -func setAffinity(d *appsv1.Deployment) { - d.Spec.Template.Spec.Affinity = &v1.Affinity{ - PodAffinity: &v1.PodAffinity{ - PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ +// setAffinities set PodAntiAffinity across pods from the same generation +// of Deployment and if, explicitly requested, also affinity with pods +// from other generations. +// It is required to make those "Required" so that in large clusters where +// scheduler may not score all nodes if a lot of them are feasible, the +// test will also have a chance to pass. +func setAffinities(d *appsv1.Deployment, setAffinity bool) { + affinity := &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ { - Weight: int32(100), - PodAffinityTerm: v1.PodAffinityTerm{ - TopologyKey: "kubernetes.io/hostname", - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{d.Spec.Template.Labels["name"]}, - }, - { - Key: "iteration", - Operator: metav1.LabelSelectorOpNotIn, - Values: []string{d.Spec.Template.Labels["iteration"]}, - }, + TopologyKey: "kubernetes.io/hostname", + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{d.Spec.Template.Labels["name"]}, + }, + { + Key: "iteration", + Operator: metav1.LabelSelectorOpIn, + Values: []string{d.Spec.Template.Labels["iteration"]}, }, }, }, }, }, }, - PodAntiAffinity: &v1.PodAntiAffinity{ + } + if setAffinity { + affinity.PodAffinity = &v1.PodAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ { TopologyKey: "kubernetes.io/hostname", @@ -999,13 +1004,14 @@ func setAffinity(d *appsv1.Deployment) { }, { Key: "iteration", - Operator: metav1.LabelSelectorOpIn, + Operator: metav1.LabelSelectorOpNotIn, Values: []string{d.Spec.Template.Labels["iteration"]}, }, }, }, }, }, - }, + } } + d.Spec.Template.Spec.Affinity = affinity } From 5a4dac205f5c7a907632132c4cc6733254663219 Mon Sep 17 00:00:00 2001 From: Miciah Masters Date: Wed, 30 Oct 2019 13:48:47 -0400 Subject: [PATCH 3/3] UPSTREAM: 84568: test/e2e/apps: Skip or scale LB test per node count Skip the "Deployment should not disrupt a cloud load-balancer's connectivity during rollout" test if the number of nodes is less than 2; otherwise, set the deployment's replicas equal to the lesser of 5 and the number of nodes. The test would fail if there were fewer nodes than replicas, but the test needs at least 2 nodes, and the likelihood of failure absent the feature under test increases with the number of replicas, so it is desirable to set replicas to a higher value, within reason. Follow-up to commit 980b6406b24b41bc0ef061624385f1f420708819. * vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go: Skip the load-balancer connectivity test unless there are at least 2 nodes. (testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): Set replicas to the min of 5 and the number of nodes. --- vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go b/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go index e8279f4cd162..12a3dfeb0225 100644 --- a/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go +++ b/vendor/k8s.io/kubernetes/test/e2e/apps/deployment.go @@ -44,6 +44,7 @@ import ( "k8s.io/kubernetes/test/e2e/framework/replicaset" e2eservice "k8s.io/kubernetes/test/e2e/framework/service" testutil "k8s.io/kubernetes/test/utils" + "k8s.io/utils/integer" utilpointer "k8s.io/utils/pointer" ) @@ -121,6 +122,7 @@ var _ = SIGDescribe("Deployment", func() { testProportionalScalingDeployment(f) }) ginkgo.It("should not disrupt a cloud load-balancer's connectivity during rollout", func() { + framework.SkipUnlessNodeCountIsAtLeast(2) framework.SkipUnlessProviderIs("aws", "azure", "gce", "gke") testRollingUpdateDeploymentWithLocalTrafficLoadBalancer(f) }) @@ -869,7 +871,7 @@ func testRollingUpdateDeploymentWithLocalTrafficLoadBalancer(f *framework.Framew name := "test-rolling-update-with-lb" framework.Logf("Creating Deployment %q", name) podLabels := map[string]string{"name": name} - replicas := int32(3) + replicas := int32(integer.IntMin(5, framework.TestContext.CloudConfig.NumNodes)) d := e2edeploy.NewDeployment(name, replicas, podLabels, AgnhostImageName, AgnhostImage, appsv1.RollingUpdateDeploymentStrategyType) // NewDeployment assigned the same value to both d.Spec.Selector and // d.Spec.Template.Labels, so mutating the one would mutate the other.