Skip to content

Commit

Permalink
Merge pull request #18758 from pravisankar/fix-clear-nodenetwork
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue.

Bug 1550266 - Fix clearInitialNodeNetworkUnavailableCondition() in sdn master 

#This change fixes these 2 issues:
- Currently, clearing NodeNetworkUnavailable node condition only works
if we are successful in updating the node status during the first iteration.
Subsequent retries will not work because:
  1. knode != node
  2. node.Status is updated in memory
  3. UpdateNodeStatus(knode)
(3) will have no effect as in step (2) node.Status is updated but not knode.Status

- Node object passed to this method is pointer to an item in the informer
cache and it should not be modified directly.

 Avoid NodeNetworkUnavailable condition check for every node status update

- We know that kubelet sets NodeNetworkUnavailable condition when the node is
created/registered with api server.
- So we only need to call clearInitialNodeNetworkUnavailableCondition()
for the first time and not during subsequent node status update events.
  • Loading branch information
openshift-merge-robot authored Mar 16, 2018
2 parents 8f99915 + 7d5f2ac commit 308bb2e
Showing 1 changed file with 21 additions and 29 deletions.
50 changes: 21 additions & 29 deletions pkg/network/master/subnets.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,55 +190,46 @@ func (master *OsdnMaster) deleteNode(nodeName string) error {
// TODO: make upstream kubelet more flexible with overlays and GCE so this
// condition doesn't get added for network plugins that don't want it, and then
// we can remove this function.
func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(node *kapi.Node) {
func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(origNode *kapi.Node) {
// Informer cache should not be mutated, so get a copy of the object
node := origNode.DeepCopy()
knode := node
cleared := false
resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
var err error

if knode != node {
knode, err = master.kClient.Core().Nodes().Get(node.ObjectMeta.Name, metav1.GetOptions{})
knode, err = master.kClient.Core().Nodes().Get(node.Name, metav1.GetOptions{})
if err != nil {
return err
}
}

// Let caller modify knode's status, then push to api server.
_, condition := GetNodeCondition(&node.Status, kapi.NodeNetworkUnavailable)
if condition != nil && condition.Status != kapi.ConditionFalse && condition.Reason == "NoRouteCreated" {
condition.Status = kapi.ConditionFalse
condition.Reason = "RouteCreated"
condition.Message = "openshift-sdn cleared kubelet-set NoRouteCreated"
condition.LastTransitionTime = metav1.Now()
knode, err = master.kClient.Core().Nodes().UpdateStatus(knode)
if err == nil {
cleared = true
for i := range knode.Status.Conditions {
if knode.Status.Conditions[i].Type == kapi.NodeNetworkUnavailable {
condition := &knode.Status.Conditions[i]
if condition.Status != kapi.ConditionFalse && condition.Reason == "NoRouteCreated" {
condition.Status = kapi.ConditionFalse
condition.Reason = "RouteCreated"
condition.Message = "openshift-sdn cleared kubelet-set NoRouteCreated"
condition.LastTransitionTime = metav1.Now()

if knode, err = master.kClient.Core().Nodes().UpdateStatus(knode); err == nil {
cleared = true
}
}
break
}
}
return err
})
if resultErr != nil {
utilruntime.HandleError(fmt.Errorf("status update failed for local node: %v", resultErr))
} else if cleared {
glog.Infof("Cleared node NetworkUnavailable/NoRouteCreated condition for %s", node.ObjectMeta.Name)
glog.Infof("Cleared node NetworkUnavailable/NoRouteCreated condition for %s", node.Name)
}
}

// TODO remove this and switch to external
// GetNodeCondition extracts the provided condition from the given status and returns that.
// Returns nil and -1 if the condition is not present, and the index of the located condition.
func GetNodeCondition(status *kapi.NodeStatus, conditionType kapi.NodeConditionType) (int, *kapi.NodeCondition) {
if status == nil {
return -1, nil
}
for i := range status.Conditions {
if status.Conditions[i].Type == conditionType {
return i, &status.Conditions[i]
}
}
return -1, nil
}

func (master *OsdnMaster) watchNodes() {
funcs := common.InformerFuncs(&kapi.Node{}, master.handleAddOrUpdateNode, master.handleDeleteNode)
master.kubeInformers.Core().InternalVersion().Nodes().Informer().AddEventHandler(funcs)
Expand All @@ -258,14 +249,15 @@ func (master *OsdnMaster) handleAddOrUpdateNode(obj, _ interface{}, eventType wa
utilruntime.HandleError(fmt.Errorf("Node IP is not set for node %s, skipping %s event, node: %v", node.Name, eventType, node))
return
}
master.clearInitialNodeNetworkUnavailableCondition(node)

if oldNodeIP, ok := master.hostSubnetNodeIPs[node.UID]; ok && (nodeIP == oldNodeIP) {
return
}
// Node status is frequently updated by kubelet, so log only if the above condition is not met
glog.V(5).Infof("Watch %s event for Node %q", eventType, node.Name)

master.clearInitialNodeNetworkUnavailableCondition(node)

usedNodeIP, err := master.addNode(node.Name, string(node.UID), nodeIP, nil)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Error creating subnet for node %s, ip %s: %v", node.Name, nodeIP, err))
Expand Down

0 comments on commit 308bb2e

Please sign in to comment.