From ebda62b67296fa5a21fb06ffd8035e83d488774f Mon Sep 17 00:00:00 2001 From: Ravi Sankar Penta Date: Mon, 26 Feb 2018 15:02:00 -0800 Subject: [PATCH 1/2] Fix clearInitialNodeNetworkUnavailableCondition() in sdn master This change fixes these 2 issues: - Currently, clearing NodeNetworkUnavailable node condition only works if we are successful in updating the node status during the first iteration. Subsequent retries will not work because: 1. knode != node 2. node.Status is updated in memory 3. UpdateNodeStatus(knode) (3) will have no effect as in step (2) node.Status is updated but not knode.Status - Node object passed to this method is pointer to an item in the informer cache and it should not be modified directly. --- pkg/network/master/subnets.go | 47 ++++++++++++++--------------------- 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/pkg/network/master/subnets.go b/pkg/network/master/subnets.go index 2e077cfb5e50..5044f716c62c 100644 --- a/pkg/network/master/subnets.go +++ b/pkg/network/master/subnets.go @@ -190,29 +190,35 @@ func (master *OsdnMaster) deleteNode(nodeName string) error { // TODO: make upstream kubelet more flexible with overlays and GCE so this // condition doesn't get added for network plugins that don't want it, and then // we can remove this function. -func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(node *kapi.Node) { +func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(origNode *kapi.Node) { + // Informer cache should not be mutated, so get a copy of the object + node := origNode.DeepCopy() knode := node cleared := false resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { var err error if knode != node { - knode, err = master.kClient.Core().Nodes().Get(node.ObjectMeta.Name, metav1.GetOptions{}) + knode, err = master.kClient.Core().Nodes().Get(node.Name, metav1.GetOptions{}) if err != nil { return err } } - // Let caller modify knode's status, then push to api server. - _, condition := GetNodeCondition(&node.Status, kapi.NodeNetworkUnavailable) - if condition != nil && condition.Status != kapi.ConditionFalse && condition.Reason == "NoRouteCreated" { - condition.Status = kapi.ConditionFalse - condition.Reason = "RouteCreated" - condition.Message = "openshift-sdn cleared kubelet-set NoRouteCreated" - condition.LastTransitionTime = metav1.Now() - knode, err = master.kClient.Core().Nodes().UpdateStatus(knode) - if err == nil { - cleared = true + for i := range knode.Status.Conditions { + if knode.Status.Conditions[i].Type == kapi.NodeNetworkUnavailable { + condition := &knode.Status.Conditions[i] + if condition.Status != kapi.ConditionFalse && condition.Reason == "NoRouteCreated" { + condition.Status = kapi.ConditionFalse + condition.Reason = "RouteCreated" + condition.Message = "openshift-sdn cleared kubelet-set NoRouteCreated" + condition.LastTransitionTime = metav1.Now() + + if knode, err = master.kClient.Core().Nodes().UpdateStatus(knode); err == nil { + cleared = true + } + } + break } } return err @@ -220,23 +226,8 @@ func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(node *kapi if resultErr != nil { utilruntime.HandleError(fmt.Errorf("status update failed for local node: %v", resultErr)) } else if cleared { - glog.Infof("Cleared node NetworkUnavailable/NoRouteCreated condition for %s", node.ObjectMeta.Name) - } -} - -// TODO remove this and switch to external -// GetNodeCondition extracts the provided condition from the given status and returns that. -// Returns nil and -1 if the condition is not present, and the index of the located condition. -func GetNodeCondition(status *kapi.NodeStatus, conditionType kapi.NodeConditionType) (int, *kapi.NodeCondition) { - if status == nil { - return -1, nil - } - for i := range status.Conditions { - if status.Conditions[i].Type == conditionType { - return i, &status.Conditions[i] - } + glog.Infof("Cleared node NetworkUnavailable/NoRouteCreated condition for %s", node.Name) } - return -1, nil } func (master *OsdnMaster) watchNodes() { From 7d5f2acac5eb0c0fd853bbf8efade73862150d3b Mon Sep 17 00:00:00 2001 From: Ravi Sankar Penta Date: Mon, 26 Feb 2018 15:24:33 -0800 Subject: [PATCH 2/2] Avoid NodeNetworkUnavailable condition check for every node status update - We know that kubelet sets NodeNetworkUnavailable condition when the node is created/registered with api server. - So we only need to call clearInitialNodeNetworkUnavailableCondition() for the first time and not during subsequent node status update events. --- pkg/network/master/subnets.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/network/master/subnets.go b/pkg/network/master/subnets.go index 5044f716c62c..65c26c104e81 100644 --- a/pkg/network/master/subnets.go +++ b/pkg/network/master/subnets.go @@ -249,7 +249,6 @@ func (master *OsdnMaster) handleAddOrUpdateNode(obj, _ interface{}, eventType wa utilruntime.HandleError(fmt.Errorf("Node IP is not set for node %s, skipping %s event, node: %v", node.Name, eventType, node)) return } - master.clearInitialNodeNetworkUnavailableCondition(node) if oldNodeIP, ok := master.hostSubnetNodeIPs[node.UID]; ok && (nodeIP == oldNodeIP) { return @@ -257,6 +256,8 @@ func (master *OsdnMaster) handleAddOrUpdateNode(obj, _ interface{}, eventType wa // Node status is frequently updated by kubelet, so log only if the above condition is not met glog.V(5).Infof("Watch %s event for Node %q", eventType, node.Name) + master.clearInitialNodeNetworkUnavailableCondition(node) + usedNodeIP, err := master.addNode(node.Name, string(node.UID), nodeIP, nil) if err != nil { utilruntime.HandleError(fmt.Errorf("Error creating subnet for node %s, ip %s: %v", node.Name, nodeIP, err))