Skip to content

Commit ea7bd81

Browse files
committed
Prefer using ready nodes and cloudprovider template nodes over unready/unschedulable nodes in scale-up
1 parent e0f9de7 commit ea7bd81

File tree

1 file changed

+41
-8
lines changed

1 file changed

+41
-8
lines changed

cluster-autoscaler/core/utils.go

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
2727
"k8s.io/autoscaler/cluster-autoscaler/simulator"
2828
"k8s.io/autoscaler/cluster-autoscaler/utils/daemonset"
29+
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
2930

3031
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3132
api "k8s.io/kubernetes/pkg/api"
@@ -138,26 +139,41 @@ func createNodeNameToInfoMap(pods []*apiv1.Pod, nodes []*apiv1.Node) map[string]
138139
func GetNodeInfosForGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.CloudProvider, kubeClient kube_client.Interface,
139140
daemonsets []*extensionsv1.DaemonSet, predicateChecker *simulator.PredicateChecker) (map[string]*schedulercache.NodeInfo, error) {
140141
result := make(map[string]*schedulercache.NodeInfo)
141-
for _, node := range nodes {
142+
143+
// processNode returns information whether the nodeTemplate was generated and if there was an error.
144+
processNode := func(node *apiv1.Node) (bool, error) {
142145
nodeGroup, err := cloudProvider.NodeGroupForNode(node)
143146
if err != nil {
144-
return map[string]*schedulercache.NodeInfo{}, err
147+
return false, err
145148
}
146149
if nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() {
147-
continue
150+
return false, nil
148151
}
149152
id := nodeGroup.Id()
150153
if _, found := result[id]; !found {
151154
// Build nodeInfo.
152155
nodeInfo, err := simulator.BuildNodeInfoForNode(node, kubeClient)
153156
if err != nil {
154-
return map[string]*schedulercache.NodeInfo{}, err
157+
return false, err
155158
}
156159
sanitizedNodeInfo, err := sanitizeNodeInfo(nodeInfo, id)
157160
if err != nil {
158-
return map[string]*schedulercache.NodeInfo{}, err
161+
return false, err
159162
}
160163
result[id] = sanitizedNodeInfo
164+
return true, nil
165+
}
166+
return false, nil
167+
}
168+
169+
for _, node := range nodes {
170+
// Broken nodes might have some stuff missing. Skipping.
171+
if !kube_util.IsNodeReadyAndSchedulable(node) {
172+
continue
173+
}
174+
_, err := processNode(node)
175+
if err != nil {
176+
return map[string]*schedulercache.NodeInfo{}, err
161177
}
162178
}
163179
for _, nodeGroup := range cloudProvider.NodeGroups() {
@@ -170,14 +186,13 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.Clou
170186
// working nodes in the node groups. By default CA tries to usa a real-world example.
171187
baseNodeInfo, err := nodeGroup.TemplateNodeInfo()
172188
if err != nil {
173-
glog.Warningf("Unable to build template node for %s: %v", id, err)
174189
if err == cloudprovider.ErrNotImplemented {
175190
continue
176191
} else {
192+
glog.Errorf("Unable to build proper template node for %s: %v", id, err)
177193
return map[string]*schedulercache.NodeInfo{}, err
178194
}
179195
}
180-
181196
pods := daemonset.GetDaemonSetPodsForNode(baseNodeInfo, daemonsets, predicateChecker)
182197
pods = append(pods, baseNodeInfo.Pods()...)
183198
fullNodeInfo := schedulercache.NewNodeInfo(pods...)
@@ -188,6 +203,25 @@ func GetNodeInfosForGroups(nodes []*apiv1.Node, cloudProvider cloudprovider.Clou
188203
}
189204
result[id] = sanitizedNodeInfo
190205
}
206+
207+
// Last resort - unready/unschedulable nodes.
208+
for _, node := range nodes {
209+
// Allowing broken nodes
210+
if !kube_util.IsNodeReadyAndSchedulable(node) {
211+
added, err := processNode(node)
212+
if err != nil {
213+
return map[string]*schedulercache.NodeInfo{}, err
214+
}
215+
nodeGroup, err := cloudProvider.NodeGroupForNode(node)
216+
if err != nil {
217+
return map[string]*schedulercache.NodeInfo{}, err
218+
}
219+
if added {
220+
glog.Warningf("Built template for %s based on unready/unschedulable node %s", nodeGroup.Id(), node.Name)
221+
}
222+
}
223+
}
224+
191225
return result, nil
192226
}
193227

@@ -201,7 +235,6 @@ func sanitizeNodeInfo(nodeInfo *schedulercache.NodeInfo, nodeGroupName string) (
201235
// Update nodename in pods.
202236
sanitizedPods := make([]*apiv1.Pod, 0)
203237
for _, pod := range nodeInfo.Pods() {
204-
205238
obj, err := api.Scheme.DeepCopy(pod)
206239
if err != nil {
207240
return nil, err

0 commit comments

Comments
 (0)