-
Notifications
You must be signed in to change notification settings - Fork 465
Skip drain on Single Node deployment #2457
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,14 +22,18 @@ import ( | |
| "k8s.io/client-go/tools/record" | ||
|
|
||
| apicfgv1 "github.com/openshift/api/config/v1" | ||
| configv1 "github.com/openshift/api/config/v1" | ||
| fakeconfigv1client "github.com/openshift/client-go/config/clientset/versioned/fake" | ||
| configv1informer "github.com/openshift/client-go/config/informers/externalversions" | ||
| mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" | ||
| ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" | ||
| daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants" | ||
| "github.com/openshift/machine-config-operator/pkg/generated/clientset/versioned/fake" | ||
| informers "github.com/openshift/machine-config-operator/pkg/generated/informers/externalversions" | ||
| "github.com/openshift/machine-config-operator/pkg/version" | ||
| "github.com/openshift/machine-config-operator/test/helpers" | ||
| "github.com/stretchr/testify/assert" | ||
| utilrand "k8s.io/apimachinery/pkg/util/rand" | ||
| ) | ||
|
|
||
| var ( | ||
|
|
@@ -44,6 +48,7 @@ type fixture struct { | |
| kubeclient *k8sfake.Clientset | ||
| schedulerClient *fakeconfigv1client.Clientset | ||
|
|
||
| ccLister []*mcfgv1.ControllerConfig | ||
| mcpLister []*mcfgv1.MachineConfigPool | ||
| nodeLister []*corev1.Node | ||
|
|
||
|
|
@@ -72,9 +77,10 @@ func (f *fixture) newController() *Controller { | |
| i := informers.NewSharedInformerFactory(f.client, noResyncPeriodFunc()) | ||
| k8sI := kubeinformers.NewSharedInformerFactory(f.kubeclient, noResyncPeriodFunc()) | ||
| ci := configv1informer.NewSharedInformerFactory(f.schedulerClient, noResyncPeriodFunc()) | ||
| c := New(i.Machineconfiguration().V1().MachineConfigPools(), k8sI.Core().V1().Nodes(), | ||
| c := New(i.Machineconfiguration().V1().ControllerConfigs(), i.Machineconfiguration().V1().MachineConfigPools(), k8sI.Core().V1().Nodes(), | ||
| ci.Config().V1().Schedulers(), f.kubeclient, f.client) | ||
|
|
||
| c.ccListerSynced = alwaysReady | ||
| c.mcpListerSynced = alwaysReady | ||
| c.nodeListerSynced = alwaysReady | ||
| c.schedulerListerSynced = alwaysReady | ||
|
|
@@ -87,6 +93,9 @@ func (f *fixture) newController() *Controller { | |
| k8sI.Start(stopCh) | ||
| k8sI.WaitForCacheSync(stopCh) | ||
|
|
||
| for _, c := range f.ccLister { | ||
| i.Machineconfiguration().V1().ControllerConfigs().Informer().GetIndexer().Add(c) | ||
| } | ||
| for _, c := range f.mcpLister { | ||
| i.Machineconfiguration().V1().MachineConfigPools().Informer().GetIndexer().Add(c) | ||
| } | ||
|
|
@@ -97,6 +106,7 @@ func (f *fixture) newController() *Controller { | |
| for _, c := range f.schedulerLister { | ||
| ci.Config().V1().Schedulers().Informer().GetIndexer().Add(c) | ||
| } | ||
|
|
||
| return c | ||
| } | ||
|
|
||
|
|
@@ -148,6 +158,20 @@ func (f *fixture) runController(pool string, expectError bool) { | |
| } | ||
| } | ||
|
|
||
| func newControllerConfig(name string, topology configv1.TopologyMode) *mcfgv1.ControllerConfig { | ||
| return &mcfgv1.ControllerConfig{ | ||
| TypeMeta: metav1.TypeMeta{APIVersion: mcfgv1.SchemeGroupVersion.String()}, | ||
| ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{daemonconsts.GeneratedByVersionAnnotationKey: version.Raw}, Name: name, UID: types.UID(utilrand.String(5))}, | ||
| Spec: mcfgv1.ControllerConfigSpec{ | ||
| Infra: &configv1.Infrastructure{ | ||
| Status: configv1.InfrastructureStatus{ | ||
| ControlPlaneTopology: topology, | ||
| }, | ||
| }, | ||
| }, | ||
| } | ||
| } | ||
|
|
||
| // checkAction verifies that expected and actual actions are equal and both have | ||
| // same attached resources | ||
| func checkAction(expected, actual core.Action, t *testing.T) { | ||
|
|
@@ -188,6 +212,8 @@ func filterInformerActions(actions []core.Action) []core.Action { | |
| if len(action.GetNamespace()) == 0 && | ||
| (action.Matches("list", "machineconfigpools") || | ||
| action.Matches("watch", "machineconfigpools") || | ||
| action.Matches("list", "controllerconfigs") || | ||
| action.Matches("watch", "controllerconfigs") || | ||
| action.Matches("list", "nodes") || | ||
| action.Matches("watch", "nodes")) { | ||
| continue | ||
|
|
@@ -758,14 +784,17 @@ func TestSetDesiredMachineConfigAnnotation(t *testing.T) { | |
|
|
||
| func TestShouldMakeProgress(t *testing.T) { | ||
| f := newFixture(t) | ||
| cc := newControllerConfig(ctrlcommon.ControllerConfigName, configv1.TopologyMode("")) | ||
| mcp := helpers.NewMachineConfigPool("test-cluster-infra", nil, helpers.InfraSelector, "v1") | ||
| mcpWorker := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v1") | ||
| mcp.Spec.MaxUnavailable = intStrPtr(intstr.FromInt(1)) | ||
|
|
||
| nodes := []*corev1.Node{ | ||
| newNodeWithLabel("node-0", "v1", "v1", map[string]string{"node-role/worker": "", "node-role/infra": ""}), | ||
| newNodeWithLabel("node-1", "v0", "v0", map[string]string{"node-role/worker": "", "node-role/infra": ""}), | ||
| } | ||
|
|
||
| f.ccLister = append(f.ccLister, cc) | ||
| f.mcpLister = append(f.mcpLister, mcp, mcpWorker) | ||
| f.objects = append(f.objects, mcp, mcpWorker) | ||
| f.nodeLister = append(f.nodeLister, nodes...) | ||
|
|
@@ -799,8 +828,11 @@ func TestShouldMakeProgress(t *testing.T) { | |
|
|
||
| func TestEmptyCurrentMachineConfig(t *testing.T) { | ||
| f := newFixture(t) | ||
| cc := newControllerConfig(ctrlcommon.ControllerConfigName, configv1.TopologyMode("")) | ||
| mcp := helpers.NewMachineConfigPool("test-cluster-master", nil, helpers.MasterSelector, "") | ||
| mcp.Spec.MaxUnavailable = intStrPtr(intstr.FromInt(1)) | ||
|
|
||
| f.ccLister = append(f.ccLister, cc) | ||
| f.mcpLister = append(f.mcpLister, mcp) | ||
| f.objects = append(f.objects, mcp) | ||
| f.run(getKey(mcp, t)) | ||
|
|
@@ -834,6 +866,7 @@ func TestPaused(t *testing.T) { | |
|
|
||
| func TestShouldUpdateStatusOnlyUpdated(t *testing.T) { | ||
| f := newFixture(t) | ||
| cc := newControllerConfig(ctrlcommon.ControllerConfigName, configv1.TopologyMode("")) | ||
| mcp := helpers.NewMachineConfigPool("test-cluster-infra", nil, helpers.InfraSelector, "v1") | ||
| mcpWorker := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v1") | ||
| mcp.Spec.MaxUnavailable = intStrPtr(intstr.FromInt(1)) | ||
|
|
@@ -842,6 +875,7 @@ func TestShouldUpdateStatusOnlyUpdated(t *testing.T) { | |
| newNodeWithLabel("node-1", "v1", "v1", map[string]string{"node-role/worker": "", "node-role/infra": ""}), | ||
| } | ||
|
|
||
| f.ccLister = append(f.ccLister, cc) | ||
| f.mcpLister = append(f.mcpLister, mcp, mcpWorker) | ||
| f.objects = append(f.objects, mcp, mcpWorker) | ||
| f.nodeLister = append(f.nodeLister, nodes...) | ||
|
|
@@ -859,6 +893,7 @@ func TestShouldUpdateStatusOnlyUpdated(t *testing.T) { | |
|
|
||
| func TestShouldUpdateStatusOnlyNoProgress(t *testing.T) { | ||
| f := newFixture(t) | ||
| cc := newControllerConfig(ctrlcommon.ControllerConfigName, configv1.TopologyMode("")) | ||
| mcp := helpers.NewMachineConfigPool("test-cluster-infra", nil, helpers.InfraSelector, "v1") | ||
| mcpWorker := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v1") | ||
| mcp.Spec.MaxUnavailable = intStrPtr(intstr.FromInt(1)) | ||
|
|
@@ -867,6 +902,7 @@ func TestShouldUpdateStatusOnlyNoProgress(t *testing.T) { | |
| newNodeWithLabel("node-1", "v0", "v1", map[string]string{"node-role/worker": "", "node-role/infra": ""}), | ||
| } | ||
|
|
||
| f.ccLister = append(f.ccLister, cc) | ||
| f.mcpLister = append(f.mcpLister, mcp, mcpWorker) | ||
| f.objects = append(f.objects, mcp, mcpWorker) | ||
| f.nodeLister = append(f.nodeLister, nodes...) | ||
|
|
@@ -884,6 +920,7 @@ func TestShouldUpdateStatusOnlyNoProgress(t *testing.T) { | |
|
|
||
| func TestShouldDoNothing(t *testing.T) { | ||
| f := newFixture(t) | ||
| cc := newControllerConfig(ctrlcommon.ControllerConfigName, configv1.TopologyMode("")) | ||
| mcp := helpers.NewMachineConfigPool("test-cluster-infra", nil, helpers.InfraSelector, "v1") | ||
| mcpWorker := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v1") | ||
| mcp.Spec.MaxUnavailable = intStrPtr(intstr.FromInt(1)) | ||
|
|
@@ -894,6 +931,7 @@ func TestShouldDoNothing(t *testing.T) { | |
| status := calculateStatus(mcp, nodes) | ||
| mcp.Status = status | ||
|
|
||
| f.ccLister = append(f.ccLister, cc) | ||
| f.mcpLister = append(f.mcpLister, mcp, mcpWorker) | ||
| f.objects = append(f.objects, mcp, mcpWorker) | ||
| f.nodeLister = append(f.nodeLister, nodes...) | ||
|
|
@@ -904,6 +942,46 @@ func TestShouldDoNothing(t *testing.T) { | |
| f.run(getKey(mcp, t)) | ||
| } | ||
|
|
||
| func TestControlPlaneTopology(t *testing.T) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I'm a bit confused by what this is trying to test. Is it checking to see if the controllerconfig topology propagates to node annotations? Where is that being checked?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, we are currently checking that with a valid controlPlaneToplogy i.e. Later on, I am thinking of extending the test to also perform
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, sorry for the dumb question, but I'm still not sure how we are testing that. If an action change happens as a bug, which line of code would return the error? I see we do the same for the above
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when in test run() calls runController(), node-controller syncHandler(pool) gets called at https://github.com/openshift/machine-config-operator/blob/master/pkg/controller/node/node_controller_test.go#L114 . As you know when this runs, various actions can be generated based on what all operation on node has been performed. In test we filter out list and watch actionhttps://github.com/openshift/machine-config-operator/blob/master/pkg/controller/node/node_controller_test.go#L121as these actions doesn't really update anything. For other actions like patch, additional action will be check in later line during checkAction() |
||
| f := newFixture(t) | ||
| cc := newControllerConfig(ctrlcommon.ControllerConfigName, configv1.SingleReplicaTopologyMode) | ||
| mcp := helpers.NewMachineConfigPool("test-cluster-infra", nil, helpers.InfraSelector, "v1") | ||
| mcpWorker := helpers.NewMachineConfigPool("worker", nil, helpers.WorkerSelector, "v1") | ||
| mcp.Spec.MaxUnavailable = intStrPtr(intstr.FromInt(1)) | ||
| annotations := map[string]string{daemonconsts.ClusterControlPlaneTopologyAnnotationKey: "SingleReplica"} | ||
|
|
||
| nodes := []*corev1.Node{ | ||
| newNodeWithLabel("node-0", "v1", "v1", map[string]string{"node-role/worker": "", "node-role/infra": ""}), | ||
| newNodeWithLabel("node-1", "v1", "v1", map[string]string{"node-role/worker": "", "node-role/infra": ""}), | ||
| } | ||
|
|
||
| for _, node := range nodes { | ||
| addNodeAnnotations(node, annotations) | ||
| } | ||
| status := calculateStatus(mcp, nodes) | ||
| mcp.Status = status | ||
|
|
||
| f.ccLister = append(f.ccLister, cc) | ||
| f.mcpLister = append(f.mcpLister, mcp, mcpWorker) | ||
| f.objects = append(f.objects, mcp, mcpWorker) | ||
| f.nodeLister = append(f.nodeLister, nodes...) | ||
| for idx := range nodes { | ||
| f.kubeobjects = append(f.kubeobjects, nodes[idx]) | ||
| } | ||
|
|
||
| f.run(getKey(mcp, t)) | ||
| } | ||
|
|
||
| // adds annotation to the node | ||
| func addNodeAnnotations(node *corev1.Node, annotations map[string]string) { | ||
| if node.Annotations == nil { | ||
| node.Annotations = map[string]string{} | ||
| } | ||
| for k, v := range annotations { | ||
| node.Annotations[k] = v | ||
| } | ||
| } | ||
|
|
||
| func getKey(config *mcfgv1.MachineConfigPool, t *testing.T) string { | ||
| key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(config) | ||
| if err != nil { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,7 @@ import ( | |
| "k8s.io/client-go/util/workqueue" | ||
| "k8s.io/kubectl/pkg/drain" | ||
|
|
||
| configv1 "github.com/openshift/api/config/v1" | ||
| "github.com/openshift/machine-config-operator/lib/resourceread" | ||
| mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" | ||
| ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" | ||
|
|
@@ -1010,7 +1011,7 @@ func (dn *Daemon) checkStateOnFirstRun() error { | |
| // take a stab at that and re-run the drain+reboot routine | ||
| if state.pendingConfig != nil && bootID == dn.bootID { | ||
| dn.logSystem("drain interrupted, retrying") | ||
| if err := dn.drain(); err != nil { | ||
| if err := dn.performDrain(); err != nil { | ||
| return err | ||
| } | ||
| if err := dn.finalizeBeforeReboot(state.pendingConfig); err != nil { | ||
|
|
@@ -1150,7 +1151,7 @@ func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, error) { | |
| // Great, we've successfully rebooted for the desired config, | ||
| // let's mark it done! | ||
| glog.Infof("Completing pending config %s", state.pendingConfig.GetName()) | ||
| if err := dn.completeUpdate(dn.node, state.pendingConfig.GetName()); err != nil { | ||
| if err := dn.completeUpdate(state.pendingConfig.GetName()); err != nil { | ||
| MCDUpdateState.WithLabelValues("", err.Error()).SetToCurrentTime() | ||
| return inDesiredConfig, err | ||
| } | ||
|
|
@@ -1282,8 +1283,8 @@ func (dn *Daemon) prepUpdateFromCluster() (*mcfgv1.MachineConfig, *mcfgv1.Machin | |
| // completeUpdate marks the node as schedulable again, then deletes the | ||
| // "transient state" file, which signifies that all of those prior steps have | ||
| // been completed. | ||
| func (dn *Daemon) completeUpdate(node *corev1.Node, desiredConfigName string) error { | ||
| if err := drain.RunCordonOrUncordon(dn.drainer, node, false); err != nil { | ||
| func (dn *Daemon) completeUpdate(desiredConfigName string) error { | ||
| if err := dn.cordonOrUncordonNode(false); err != nil { | ||
|
||
| return err | ||
| } | ||
|
|
||
|
|
@@ -1617,3 +1618,23 @@ func (dn *Daemon) senseAndLoadOnceFrom(onceFrom string) (interface{}, onceFromOr | |
|
|
||
| return nil, onceFromUnknownConfig, fmt.Errorf("unable to decipher onceFrom config type: %v", err) | ||
| } | ||
|
|
||
| func isSingleNodeTopology(topology configv1.TopologyMode) bool { | ||
| return topology == configv1.SingleReplicaTopologyMode | ||
| } | ||
|
|
||
| // getControlPlaneTopology reads from node annotation and returns | ||
| // controlPlaneTopology value set in the cluster. If annotation value | ||
| // is unrecognized, we consider it as a highly available cluster. | ||
| func (dn *Daemon) getControlPlaneTopology() configv1.TopologyMode { | ||
| controlPlaneTopology := dn.node.Annotations[constants.ClusterControlPlaneTopologyAnnotationKey] | ||
| switch configv1.TopologyMode(controlPlaneTopology) { | ||
| case configv1.SingleReplicaTopologyMode: | ||
| return configv1.SingleReplicaTopologyMode | ||
| case configv1.HighlyAvailableTopologyMode: | ||
| return configv1.HighlyAvailableTopologyMode | ||
| default: | ||
| // for any unhandled case, default to HighlyAvailableTopologyMode | ||
| return configv1.HighlyAvailableTopologyMode | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where is
controlPlaneTopologycoming from? I see in the commit message:ControllerConfig now understands ControlPlaneTopology:TopologyMode set in the cluster.But I don't see how we're syncing that into the controlPlaneTopology field in the controllerconfig, maybe I'm missing something
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
syncMachineConfigController() calls resourceapply.ApplyControllerConfig that calls resourcemerge.EnsureControllerConfig which calls ensureControllerConfigSpec() and here we check if Infra object has been modified and update controllerconfig if changed . Infra object is the one where
controlPlaneTopologyvalue exist as well along with other data https://github.com/openshift/api/blob/master/config/v1/types_infrastructure.go#L86 . MCO was already reading and updating Infra content, so adding controlPlaneToplogy field in crd populates its value as well.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And operator reads infrastructure object from cluster at https://github.com/openshift/machine-config-operator/blob/master/pkg/operator/sync.go#L220 and syncs the controllerconfigspec . There are too many things to followup...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ahh, ok thanks for the explanation! Right it gets synced as part of the infra field. Not the clearest of code paths 🤦