-
Notifications
You must be signed in to change notification settings - Fork 466
daemon: selectively reboot based on diffs of applied MCs #2259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
84362bf
81476d4
45a1030
e443af7
85ac6c0
ffacb2d
0103ee1
e25a613
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,6 +48,16 @@ const ( | |
| fipsFile = "/proc/sys/crypto/fips_enabled" | ||
| extensionsRepo = "/etc/yum.repos.d/coreos-extensions.repo" | ||
| osImageContentBaseDir = "/run/mco-machine-os-content/" | ||
|
|
||
| // These are the actions for a node to take after applying config changes. (e.g. a new machineconfig is applied) | ||
| // "None" means no special action needs to be taken. A drain will still happen. | ||
| // This currently happens when ssh keys or pull secret (/var/lib/kubelet/config.json) is changed | ||
| postConfigChangeActionNone = "none" | ||
| // Rebooting is still the default scenario for any other change | ||
| postConfigChangeActionReboot = "reboot" | ||
| // Crio reload will happen when /etc/containers/registries.conf is changed. This will cause | ||
| // a "systemctl reload crio" | ||
| postConfigChangeActionReloadCrio = "reload crio" | ||
| ) | ||
|
|
||
| func writeFileAtomicallyWithDefaults(fpath string, b []byte) error { | ||
|
|
@@ -93,9 +103,70 @@ func getNodeRef(node *corev1.Node) *corev1.ObjectReference { | |
| } | ||
| } | ||
|
|
||
| // finalizeAndReboot is the last step in an update(), and it can also | ||
| // be called as a special case for the "bootstrap pivot". | ||
| func (dn *Daemon) finalizeAndReboot(newConfig *mcfgv1.MachineConfig) (retErr error) { | ||
| func reloadService(name string) error { | ||
| _, err := runGetOut("systemctl", "reload", name) | ||
| return err | ||
| } | ||
|
|
||
| // performPostConfigChangeAction takes action based on what postConfigChangeAction has been asked. | ||
| // For non-reboot action, it applies configuration, updates node's config and state. | ||
| // In the end uncordon node to schedule workload. | ||
| // If at any point an error occurs, we reboot the node so that node has correct configuration. | ||
| func (dn *Daemon) performPostConfigChangeAction(postConfigChangeActions []string, configName string) error { | ||
|
||
| if ctrlcommon.InSlice(postConfigChangeActionReboot, postConfigChangeActions) { | ||
| dn.logSystem("Rebooting node") | ||
| return dn.reboot(fmt.Sprintf("Node will reboot into config %s", configName)) | ||
| } | ||
|
|
||
| if ctrlcommon.InSlice(postConfigChangeActionNone, postConfigChangeActions) { | ||
| if dn.recorder != nil { | ||
| dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "SkipReboot", "Config changes do not require reboot.") | ||
| } | ||
| dn.logSystem("Node has Desired Config %s, skipping reboot", configName) | ||
| } | ||
|
|
||
| if ctrlcommon.InSlice(postConfigChangeActionReloadCrio, postConfigChangeActions) { | ||
| serviceName := "crio" | ||
|
|
||
| if err := reloadService(serviceName); err != nil { | ||
| if dn.recorder != nil { | ||
| dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeWarning, "FailedServiceReload", fmt.Sprintf("Reloading %s service failed. Error: %v", serviceName, err)) | ||
| } | ||
| dn.logSystem("Reloading %s configuration failed, node will reboot instead. Error: %v", serviceName, err) | ||
kikisdeliveryservice marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| dn.reboot(fmt.Sprintf("Node will reboot into config %s", configName)) | ||
| } | ||
|
|
||
| if dn.recorder != nil { | ||
| dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "SkipReboot", "Config changes do not require reboot. Service %s was reloaded.", serviceName) | ||
| } | ||
| dn.logSystem("%s config reloaded successfully! Desired config %s has been applied, skipping reboot", serviceName, configName) | ||
| } | ||
|
|
||
| // We are here, which means reboot was not needed to apply the configuration. | ||
|
|
||
| // Get current state of node, in case of an error reboot | ||
| state, err := dn.getStateAndConfigs(configName) | ||
| if err != nil { | ||
| glog.Errorf("Error processing state and configs, node will reboot instead. Error: %v", err) | ||
| return dn.reboot(fmt.Sprintf("Node will reboot into config %s", configName)) | ||
| } | ||
|
|
||
| var inDesiredConfig bool | ||
| if inDesiredConfig, err = dn.updateConfigAndState(state); err != nil { | ||
| glog.Errorf("Setting node's state to Done failed, node will reboot instead. Error: %v", err) | ||
| return dn.reboot(fmt.Sprintf("Node will reboot into config %s", configName)) | ||
| } | ||
| if inDesiredConfig { | ||
| return nil | ||
| } | ||
|
|
||
| // currentConfig != desiredConfig, kick off an update | ||
| return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig) | ||
| } | ||
|
|
||
| // finalizeBeforeReboot is the last step in an update() and then we take appropriate postConfigChangeAction. | ||
| // It can also be called as a special case for the "bootstrap pivot". | ||
| func (dn *Daemon) finalizeBeforeReboot(newConfig *mcfgv1.MachineConfig) (retErr error) { | ||
| if out, err := dn.storePendingState(newConfig, 1); err != nil { | ||
| return errors.Wrapf(err, "failed to log pending config: %s", string(out)) | ||
| } | ||
|
|
@@ -114,8 +185,7 @@ func (dn *Daemon) finalizeAndReboot(newConfig *mcfgv1.MachineConfig) (retErr err | |
| dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "PendingConfig", fmt.Sprintf("Written pending config %s", newConfig.GetName())) | ||
| } | ||
|
|
||
| // reboot. this function shouldn't actually return. | ||
| return dn.reboot(fmt.Sprintf("Node will reboot into config %v", newConfig.GetName())) | ||
| return nil | ||
| } | ||
|
|
||
| func (dn *Daemon) drain() error { | ||
|
|
@@ -394,6 +464,101 @@ func (dn *Daemon) applyOSChanges(oldConfig, newConfig *mcfgv1.MachineConfig) (re | |
|
|
||
| } | ||
|
|
||
| func calculatePostConfigChangeActionFromFileDiffs(oldIgnConfig, newIgnConfig ign3types.Config) (actions []string) { | ||
| filesPostConfigChangeActionNone := []string{ | ||
| "/var/lib/kubelet/config.json", | ||
| } | ||
| filesPostConfigChangeActionReloadCrio := []string{ | ||
| "/etc/containers/registries.conf", | ||
| } | ||
|
|
||
| oldFileSet := make(map[string]ign3types.File) | ||
| for _, f := range oldIgnConfig.Storage.Files { | ||
| oldFileSet[f.Path] = f | ||
| } | ||
| newFileSet := make(map[string]ign3types.File) | ||
| for _, f := range newIgnConfig.Storage.Files { | ||
| newFileSet[f.Path] = f | ||
| } | ||
| diffFileSet := []string{} | ||
|
|
||
| // First check if any files were removed | ||
| for path := range oldFileSet { | ||
| _, ok := newFileSet[path] | ||
| if !ok { | ||
| // debug: remove | ||
| glog.Infof("File diff: %v was deleted", path) | ||
| diffFileSet = append(diffFileSet, path) | ||
| } | ||
| } | ||
|
|
||
| // Now check if any files were added/changed | ||
| for path, newFile := range newFileSet { | ||
| oldFile, ok := oldFileSet[path] | ||
| if !ok { | ||
| // debug: remove | ||
| glog.Infof("File diff: %v was added", path) | ||
| diffFileSet = append(diffFileSet, path) | ||
| } else if !reflect.DeepEqual(oldFile, newFile) { | ||
| // debug: remove | ||
| glog.Infof("File diff: detected change to %v", newFile.Path) | ||
| diffFileSet = append(diffFileSet, path) | ||
| } | ||
| } | ||
|
|
||
| // Now calculate action | ||
| for _, k := range diffFileSet { | ||
| if ctrlcommon.InSlice(k, filesPostConfigChangeActionNone) { | ||
| continue | ||
| } else if ctrlcommon.InSlice(k, filesPostConfigChangeActionReloadCrio) { | ||
| actions = []string{postConfigChangeActionReloadCrio} | ||
| continue | ||
| } else { | ||
| actions = []string{postConfigChangeActionReboot} | ||
| break | ||
| } | ||
| } | ||
|
|
||
| if len(actions) == 0 { | ||
| actions = []string{postConfigChangeActionNone} | ||
| } | ||
| return | ||
| } | ||
|
|
||
| func calculatePostConfigChangeAction(oldConfig, newConfig *mcfgv1.MachineConfig) ([]string, error) { | ||
| // If a machine-config-daemon-force file is present, it means the user wants to | ||
| // move to desired state without additional validation. We will reboot the node in | ||
| // this case regardless of what MachineConfig diff is. | ||
| if _, err := os.Stat(constants.MachineConfigDaemonForceFile); err == nil { | ||
| if err := os.Remove(constants.MachineConfigDaemonForceFile); err != nil { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this does change the behaviour of how the forcefile works slightly, but I guess its probably for the better, since this way it stays on the system until an update happens. Off the top of my head I don't think this should cause a problem. So +1 |
||
| return []string{}, errors.Wrap(err, "failed to remove force validation file") | ||
| } | ||
| glog.Infof("Setting post config change action to postConfigChangeActionReboot; %s present", constants.MachineConfigDaemonForceFile) | ||
| return []string{postConfigChangeActionReboot}, nil | ||
| } | ||
|
|
||
| diff, err := newMachineConfigDiff(oldConfig, newConfig) | ||
| if err != nil { | ||
| return []string{}, err | ||
| } | ||
| if diff.osUpdate || diff.kargs || diff.fips || diff.units || diff.kernelType || diff.extensions { | ||
| // must reboot | ||
| return []string{postConfigChangeActionReboot}, nil | ||
| } | ||
|
|
||
| oldIgnConfig, err := ctrlcommon.ParseAndConvertConfig(oldConfig.Spec.Config.Raw) | ||
| if err != nil { | ||
| return []string{}, err | ||
| } | ||
| newIgnConfig, err := ctrlcommon.ParseAndConvertConfig(newConfig.Spec.Config.Raw) | ||
| if err != nil { | ||
| return []string{}, err | ||
| } | ||
|
|
||
| // We don't actually have to consider ssh keys changes, which is the only section of passwd that is allowed to change | ||
| return calculatePostConfigChangeActionFromFileDiffs(oldIgnConfig, newIgnConfig), nil | ||
| } | ||
|
|
||
| // update the node to the provided node configuration. | ||
| func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr error) { | ||
| oldConfig = canonicalizeEmptyMC(oldConfig) | ||
|
|
@@ -440,6 +605,11 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr err | |
|
|
||
| dn.logSystem("Starting update from %s to %s: %+v", oldConfigName, newConfigName, diff) | ||
|
|
||
| actions, err := calculatePostConfigChangeAction(oldConfig, newConfig) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| if err := dn.drain(); err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -515,7 +685,11 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr err | |
| glog.Info("Updated kernel tuning arguments") | ||
| } | ||
|
|
||
| return dn.finalizeAndReboot(newConfig) | ||
| if err := dn.finalizeBeforeReboot(newConfig); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| return dn.performPostConfigChangeAction(actions, newConfig.GetName()) | ||
| } | ||
|
|
||
| // removeRollback removes the rpm-ostree rollback deployment. It | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.