Skip to content

Commit

Permalink
nfd-master: get node object only once when updating node
Browse files Browse the repository at this point in the history
Prevent excess queries of node objects from the Kubernetes apiserver.
This significantly speeds up node updates (and reduces the load on the
apiserver) as the client-side throttling (which is good) does not bite
us that hard.
  • Loading branch information
marquiz committed Apr 3, 2024
1 parent fcf819a commit c999440
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 44 deletions.
12 changes: 2 additions & 10 deletions pkg/nfd-master/nfd-master-internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func TestUpdateNodeObject(t *testing.T) {
fakeMaster := newFakeMaster(fakeCli)

Convey("When I successfully update the node with feature labels", func() {
err := fakeMaster.updateNodeObject(testNodeName, featureLabels, featureAnnotations, featureExtResources, nil)
err := fakeMaster.updateNodeObject(testNode, featureLabels, featureAnnotations, featureExtResources, nil)
Convey("Error is nil", func() {
So(err, ShouldBeNil)
})
Expand Down Expand Up @@ -185,19 +185,11 @@ func TestUpdateNodeObject(t *testing.T) {
})
})

Convey("When I fail to get a node while updating feature labels", func() {
err := fakeMaster.updateNodeObject("non-existent-node", featureLabels, featureAnnotations, featureExtResources, nil)

Convey("Error is produced", func() {
So(err, ShouldBeError)
})
})

Convey("When I fail to patch a node", func() {
fakeCli.CoreV1().(*fakecorev1client.FakeCoreV1).PrependReactor("patch", "nodes", func(action clienttesting.Action) (handled bool, ret runtime.Object, err error) {
return true, &v1.Node{}, errors.New("Fake error when patching node")
})
err := fakeMaster.updateNodeObject(testNodeName, nil, featureAnnotations, ExtendedResources{"": ""}, nil)
err := fakeMaster.updateNodeObject(testNode, nil, featureAnnotations, ExtendedResources{"": ""}, nil)

Convey("Error is produced", func() {
So(err, ShouldBeError)
Expand Down
58 changes: 26 additions & 32 deletions pkg/nfd-master/nfd-master.go
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ func (m *nfdMaster) prune() error {
klog.InfoS("pruning node...", "nodeName", node.Name)

// Prune labels and extended resources
err := m.updateNodeObject(node.Name, Labels{}, Annotations{}, ExtendedResources{}, []corev1.Taint{})
err := m.updateNodeObject(&node, Labels{}, Annotations{}, ExtendedResources{}, []corev1.Taint{})

Check warning on line 506 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L506

Added line #L506 was not covered by tests
if err != nil {
nodeUpdateFailures.Inc()
return fmt.Errorf("failed to prune node %q: %v", node.Name, err)
Expand Down Expand Up @@ -692,8 +692,13 @@ func (m *nfdMaster) SetLabels(c context.Context, r *pb.SetLabelsRequest) (*pb.Se
klog.InfoS("gRPC SetLabels request received", "nodeName", r.NodeName)
}
if !m.config.NoPublish {
// Fetch the node object.
node, err := m.getNode(r.NodeName)
if err != nil {
return &pb.SetLabelsReply{}, err
}

Check warning on line 699 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L698-L699

Added lines #L698 - L699 were not covered by tests
// Create labels et al
if err := m.refreshNodeFeatures(r.NodeName, r.GetLabels(), r.GetFeatures()); err != nil {
if err := m.refreshNodeFeatures(node, r.GetLabels(), r.GetFeatures()); err != nil {
nodeUpdateFailures.Inc()
return &pb.SetLabelsReply{}, err
}
Expand All @@ -716,15 +721,15 @@ func (m *nfdMaster) nfdAPIUpdateAllNodes() error {
return nil
}

func (m *nfdMaster) nfdAPIUpdateOneNode(nodeName string) error {
func (m *nfdMaster) nfdAPIUpdateOneNode(node *corev1.Node) error {

Check warning on line 724 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L724

Added line #L724 was not covered by tests
if m.nfdController == nil || m.nfdController.featureLister == nil {
return nil
}

sel := k8sLabels.SelectorFromSet(k8sLabels.Set{nfdv1alpha1.NodeFeatureObjNodeNameLabel: nodeName})
sel := k8sLabels.SelectorFromSet(k8sLabels.Set{nfdv1alpha1.NodeFeatureObjNodeNameLabel: node.Name})

Check warning on line 729 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L729

Added line #L729 was not covered by tests
objs, err := m.nfdController.featureLister.List(sel)
if err != nil {
return fmt.Errorf("failed to get NodeFeature resources for node %q: %w", nodeName, err)
return fmt.Errorf("failed to get NodeFeature resources for node %q: %w", node.Name, err)

Check warning on line 732 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L732

Added line #L732 was not covered by tests
}

// Sort our objects
Expand All @@ -748,7 +753,7 @@ func (m *nfdMaster) nfdAPIUpdateOneNode(nodeName string) error {
return nil
}

klog.V(1).InfoS("processing of node initiated by NodeFeature API", "nodeName", nodeName)
klog.V(1).InfoS("processing of node initiated by NodeFeature API", "nodeName", node.Name)

Check warning on line 756 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L756

Added line #L756 was not covered by tests

features := nfdv1alpha1.NewNodeFeatureSpec()

Expand All @@ -775,7 +780,7 @@ func (m *nfdMaster) nfdAPIUpdateOneNode(nodeName string) error {
// Update node labels et al. This may also mean removing all NFD-owned
// labels (et al.), for example in the case no NodeFeature objects are
// present.
if err := m.refreshNodeFeatures(nodeName, features.Labels, &features.Features); err != nil {
if err := m.refreshNodeFeatures(node, features.Labels, &features.Features); err != nil {

Check warning on line 783 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L783

Added line #L783 was not covered by tests
return err
}

Expand Down Expand Up @@ -820,14 +825,14 @@ func filterExtendedResource(name, value string, features *nfdv1alpha1.Features)
return filteredValue, nil
}

func (m *nfdMaster) refreshNodeFeatures(nodeName string, labels map[string]string, features *nfdv1alpha1.Features) error {
func (m *nfdMaster) refreshNodeFeatures(node *corev1.Node, labels map[string]string, features *nfdv1alpha1.Features) error {
if m.config.AutoDefaultNs {
labels = addNsToMapKeys(labels, nfdv1alpha1.FeatureLabelNs)
} else if labels == nil {
labels = make(map[string]string)
}

crLabels, crAnnotations, crExtendedResources, crTaints := m.processNodeFeatureRule(nodeName, features)
crLabels, crAnnotations, crExtendedResources, crTaints := m.processNodeFeatureRule(node.Name, features)

// Mix in CR-originated labels
maps.Copy(labels, crLabels)
Expand All @@ -849,9 +854,9 @@ func (m *nfdMaster) refreshNodeFeatures(nodeName string, labels map[string]strin
taints = filterTaints(crTaints)
}

err := m.updateNodeObject(nodeName, labels, annotations, extendedResources, taints)
err := m.updateNodeObject(node, labels, annotations, extendedResources, taints)
if err != nil {
klog.ErrorS(err, "failed to update node", "nodeName", nodeName)
klog.ErrorS(err, "failed to update node", "nodeName", node.Name)
return err
}

Expand All @@ -861,14 +866,9 @@ func (m *nfdMaster) refreshNodeFeatures(nodeName string, labels map[string]strin
// setTaints sets node taints and annotations based on the taints passed via
// nodeFeatureRule custom resorce. If empty list of taints is passed, currently
// NFD owned taints and annotations are removed from the node.
func (m *nfdMaster) setTaints(taints []corev1.Taint, nodeName string) error {
// Fetch the node object.
node, err := m.getNode(nodeName)
if err != nil {
return err
}

func (m *nfdMaster) setTaints(taints []corev1.Taint, node *corev1.Node) error {
// De-serialize the taints annotation into corev1.Taint type for comparision below.
var err error
oldTaints := []corev1.Taint{}
if val, ok := node.Annotations[nfdv1alpha1.NodeTaintsAnnotation]; ok {
sts := strings.Split(val, ",")
Expand Down Expand Up @@ -905,11 +905,11 @@ func (m *nfdMaster) setTaints(taints []corev1.Taint, nodeName string) error {
}

if taintsUpdated {
err = controller.PatchNodeTaints(context.TODO(), m.k8sClient, nodeName, node, newNode)
err := controller.PatchNodeTaints(context.TODO(), m.k8sClient, node.Name, node, newNode)

Check warning on line 908 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L908

Added line #L908 was not covered by tests
if err != nil {
return fmt.Errorf("failed to patch the node %v", node.Name)
}
klog.InfoS("updated node taints", "nodeName", nodeName)
klog.InfoS("updated node taints", "nodeName", node.Name)

Check warning on line 912 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L912

Added line #L912 was not covered by tests
}

// Update node annotation that holds the taints managed by us
Expand All @@ -930,7 +930,7 @@ func (m *nfdMaster) setTaints(taints []corev1.Taint, nodeName string) error {
if err != nil {
return fmt.Errorf("error while patching node object: %w", err)
}
klog.V(1).InfoS("patched node annotations for taints", "nodeName", nodeName)
klog.V(1).InfoS("patched node annotations for taints", "nodeName", node.Name)

Check warning on line 933 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L933

Added line #L933 was not covered by tests
}
return nil
}
Expand Down Expand Up @@ -1024,13 +1024,7 @@ func (m *nfdMaster) processNodeFeatureRule(nodeName string, features *nfdv1alpha
// updateNodeObject ensures the Kubernetes node object is up to date,
// creating new labels and extended resources where necessary and removing
// outdated ones. Also updates the corresponding annotations.
func (m *nfdMaster) updateNodeObject(nodeName string, labels Labels, featureAnnotations Annotations, extendedResources ExtendedResources, taints []corev1.Taint) error {
// Get the worker node object
node, err := m.getNode(nodeName)
if err != nil {
return err
}

func (m *nfdMaster) updateNodeObject(node *corev1.Node, labels Labels, featureAnnotations Annotations, extendedResources ExtendedResources, taints []corev1.Taint) error {
annotations := make(Annotations)

// Store names of labels in an annotation
Expand Down Expand Up @@ -1083,7 +1077,7 @@ func (m *nfdMaster) updateNodeObject(nodeName string, labels Labels, featureAnno

// patch node status with extended resource changes
statusPatches := m.createExtendedResourcePatches(node, extendedResources)
err = m.patchNodeStatus(node.Name, statusPatches)
err := m.patchNodeStatus(node.Name, statusPatches)
if err != nil {
return fmt.Errorf("error while patching extended resources: %w", err)
}
Expand All @@ -1096,13 +1090,13 @@ func (m *nfdMaster) updateNodeObject(nodeName string, labels Labels, featureAnno

if len(patches) > 0 || len(statusPatches) > 0 {
nodeUpdates.Inc()
klog.InfoS("node updated", "nodeName", nodeName)
klog.InfoS("node updated", "nodeName", node.Name)
} else {
klog.V(1).InfoS("no updates to node", "nodeName", nodeName)
klog.V(1).InfoS("no updates to node", "nodeName", node.Name)

Check warning on line 1095 in pkg/nfd-master/nfd-master.go

View check run for this annotation

Codecov / codecov/patch

pkg/nfd-master/nfd-master.go#L1095

Added line #L1095 was not covered by tests
}

// Set taints
err = m.setTaints(taints, node.Name)
err = m.setTaints(taints, node)
if err != nil {
return err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/nfd-master/node-updater-pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ func (u *nodeUpdaterPool) processNodeUpdateRequest(queue workqueue.RateLimitingI
nodeUpdateRequests.Inc()

// Check if node exists
if _, err := u.nfdMaster.getNode(nodeName); apierrors.IsNotFound(err) {
if node, err := u.nfdMaster.getNode(nodeName); apierrors.IsNotFound(err) {
klog.InfoS("node not found, skip update", "nodeName", nodeName)
} else if err := u.nfdMaster.nfdAPIUpdateOneNode(nodeName); err != nil {
} else if err := u.nfdMaster.nfdAPIUpdateOneNode(node); err != nil {
if n := queue.NumRequeues(nodeName); n < 15 {
klog.InfoS("retrying node update", "nodeName", nodeName, "lastError", err, "numRetries", n)
} else {
Expand Down

0 comments on commit c999440

Please sign in to comment.