diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 42336a6e25..4e244efb1a 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -1015,7 +1015,10 @@ func (dn *Daemon) checkStateOnFirstRun() error { if err := dn.drain(); err != nil { return err } - return dn.finalizeAndReboot(state.pendingConfig) + if err := dn.finalizeBeforeReboot(state.pendingConfig); err != nil { + return err + } + return dn.reboot(fmt.Sprintf("Node will reboot into config %v", state.pendingConfig.GetName())) } if err := dn.detectEarlySSHAccessesFromBoot(); err != nil { @@ -1043,7 +1046,10 @@ func (dn *Daemon) checkStateOnFirstRun() error { if err := os.RemoveAll(osImageContentDir); err != nil { return err } - return dn.finalizeAndReboot(state.currentConfig) + if err := dn.finalizeBeforeReboot(state.currentConfig); err != nil { + return err + } + return dn.reboot(fmt.Sprintf("Node will reboot into config %v", state.currentConfig.GetName())) } glog.Info("No bootstrap pivot required; unlinking bootstrap node annotations") @@ -1097,28 +1103,39 @@ func (dn *Daemon) checkStateOnFirstRun() error { glog.Info("Validated on-disk state") } else { glog.Infof("Skipping on-disk validation; %s present", constants.MachineConfigDaemonForceFile) - if err := os.Remove(constants.MachineConfigDaemonForceFile); err != nil { - return errors.Wrap(err, "failed to remove force validation file") - } return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig) } - // We've validated our state. In the case where we had a pendingConfig, - // make that now currentConfig. We update the node annotation, delete the - // state file, etc. - // - // However, it may be the case that desiredConfig changed while we - // were coming up, so we next look at that before uncordoning the node (so - // we don't uncordon and then immediately re-cordon) + // We've validated state. Now, ensure that node is in desired state + var inDesiredConfig bool + if inDesiredConfig, err = dn.updateConfigAndState(state); err != nil { + return err + } + if inDesiredConfig { + return nil + } + + if dn.recorder != nil { + dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "BootResync", fmt.Sprintf("Booting node %s, currentConfig %s, desiredConfig %s", dn.node.Name, state.currentConfig.GetName(), state.desiredConfig.GetName())) + } + // currentConfig != desiredConfig, and we're not booting up into the desiredConfig. + // Kick off an update. + return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig) +} + +// updateConfigAndState updates node to desired state, labels nodes as done and uncordon +func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, error) { + // In the case where we had a pendingConfig, make that now currentConfig. + // We update the node annotation, delete the state file, etc. if state.pendingConfig != nil { if dn.recorder != nil { dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "NodeDone", fmt.Sprintf("Setting node %s, currentConfig %s to Done", dn.node.Name, state.pendingConfig.GetName())) } if err := dn.nodeWriter.SetDone(dn.kubeClient.CoreV1().Nodes(), dn.nodeLister, dn.name, state.pendingConfig.GetName()); err != nil { - return errors.Wrap(err, "error setting node's state to Done") + return true, errors.Wrap(err, "error setting node's state to Done") } if out, err := dn.storePendingState(state.pendingConfig, 0); err != nil { - return errors.Wrapf(err, "failed to reset pending config: %s", string(out)) + return true, errors.Wrapf(err, "failed to reset pending config: %s", string(out)) } state.currentConfig = state.pendingConfig @@ -1126,10 +1143,13 @@ func (dn *Daemon) checkStateOnFirstRun() error { if state.bootstrapping { if err := dn.storeCurrentConfigOnDisk(state.currentConfig); err != nil { - return err + return false, err } } + // In case of node reboot, it may be the case that desiredConfig changed while we + // were coming up, so we next look at that before uncordoning the node (so + // we don't uncordon and then immediately re-cordon) inDesiredConfig := state.currentConfig.GetName() == state.desiredConfig.GetName() if inDesiredConfig { if state.pendingConfig != nil { @@ -1138,7 +1158,7 @@ func (dn *Daemon) checkStateOnFirstRun() error { glog.Infof("Completing pending config %s", state.pendingConfig.GetName()) if err := dn.completeUpdate(dn.node, state.pendingConfig.GetName()); err != nil { MCDUpdateState.WithLabelValues("", err.Error()).SetToCurrentTime() - return err + return inDesiredConfig, err } } // If we're degraded here, it means we got an error likely on startup and we retried. @@ -1147,22 +1167,16 @@ func (dn *Daemon) checkStateOnFirstRun() error { if err := dn.nodeWriter.SetDone(dn.kubeClient.CoreV1().Nodes(), dn.nodeLister, dn.name, state.currentConfig.GetName()); err != nil { errLabelStr := fmt.Sprintf("error setting node's state to Done: %v", err) MCDUpdateState.WithLabelValues("", errLabelStr).SetToCurrentTime() - return errors.Wrap(err, "error setting node's state to Done") + return inDesiredConfig, errors.Wrap(err, "error setting node's state to Done") } } glog.Infof("In desired config %s", state.currentConfig.GetName()) MCDUpdateState.WithLabelValues(state.currentConfig.GetName(), "").SetToCurrentTime() - - // All good! - return nil - } - if dn.recorder != nil { - dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "BootResync", fmt.Sprintf("Booting node %s, currentConfig %s, desiredConfig %s", dn.node.Name, state.currentConfig.GetName(), state.desiredConfig.GetName())) } - // currentConfig != desiredConfig, and we're not booting up into the desiredConfig. - // Kick off an update. - return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig) + + // No errors have occurred. Returns true if currentConfig == desiredConfig, false otherwise (needs update) + return inDesiredConfig, nil } // runOnceFromMachineConfig utilizes a parsed machineConfig and executes in onceFrom diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 15ff2512fa..85cd114205 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -48,6 +48,16 @@ const ( fipsFile = "/proc/sys/crypto/fips_enabled" extensionsRepo = "/etc/yum.repos.d/coreos-extensions.repo" osImageContentBaseDir = "/run/mco-machine-os-content/" + + // These are the actions for a node to take after applying config changes. (e.g. a new machineconfig is applied) + // "None" means no special action needs to be taken. A drain will still happen. + // This currently happens when ssh keys or pull secret (/var/lib/kubelet/config.json) is changed + postConfigChangeActionNone = "none" + // Rebooting is still the default scenario for any other change + postConfigChangeActionReboot = "reboot" + // Crio reload will happen when /etc/containers/registries.conf is changed. This will cause + // a "systemctl reload crio" + postConfigChangeActionReloadCrio = "reload crio" ) func writeFileAtomicallyWithDefaults(fpath string, b []byte) error { @@ -93,9 +103,70 @@ func getNodeRef(node *corev1.Node) *corev1.ObjectReference { } } -// finalizeAndReboot is the last step in an update(), and it can also -// be called as a special case for the "bootstrap pivot". -func (dn *Daemon) finalizeAndReboot(newConfig *mcfgv1.MachineConfig) (retErr error) { +func reloadService(name string) error { + _, err := runGetOut("systemctl", "reload", name) + return err +} + +// performPostConfigChangeAction takes action based on what postConfigChangeAction has been asked. +// For non-reboot action, it applies configuration, updates node's config and state. +// In the end uncordon node to schedule workload. +// If at any point an error occurs, we reboot the node so that node has correct configuration. +func (dn *Daemon) performPostConfigChangeAction(postConfigChangeActions []string, configName string) error { + if ctrlcommon.InSlice(postConfigChangeActionReboot, postConfigChangeActions) { + dn.logSystem("Rebooting node") + return dn.reboot(fmt.Sprintf("Node will reboot into config %s", configName)) + } + + if ctrlcommon.InSlice(postConfigChangeActionNone, postConfigChangeActions) { + if dn.recorder != nil { + dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "SkipReboot", "Config changes do not require reboot.") + } + dn.logSystem("Node has Desired Config %s, skipping reboot", configName) + } + + if ctrlcommon.InSlice(postConfigChangeActionReloadCrio, postConfigChangeActions) { + serviceName := "crio" + + if err := reloadService(serviceName); err != nil { + if dn.recorder != nil { + dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeWarning, "FailedServiceReload", fmt.Sprintf("Reloading %s service failed. Error: %v", serviceName, err)) + } + dn.logSystem("Reloading %s configuration failed, node will reboot instead. Error: %v", serviceName, err) + dn.reboot(fmt.Sprintf("Node will reboot into config %s", configName)) + } + + if dn.recorder != nil { + dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "SkipReboot", "Config changes do not require reboot. Service %s was reloaded.", serviceName) + } + dn.logSystem("%s config reloaded successfully! Desired config %s has been applied, skipping reboot", serviceName, configName) + } + + // We are here, which means reboot was not needed to apply the configuration. + + // Get current state of node, in case of an error reboot + state, err := dn.getStateAndConfigs(configName) + if err != nil { + glog.Errorf("Error processing state and configs, node will reboot instead. Error: %v", err) + return dn.reboot(fmt.Sprintf("Node will reboot into config %s", configName)) + } + + var inDesiredConfig bool + if inDesiredConfig, err = dn.updateConfigAndState(state); err != nil { + glog.Errorf("Setting node's state to Done failed, node will reboot instead. Error: %v", err) + return dn.reboot(fmt.Sprintf("Node will reboot into config %s", configName)) + } + if inDesiredConfig { + return nil + } + + // currentConfig != desiredConfig, kick off an update + return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig) +} + +// finalizeBeforeReboot is the last step in an update() and then we take appropriate postConfigChangeAction. +// It can also be called as a special case for the "bootstrap pivot". +func (dn *Daemon) finalizeBeforeReboot(newConfig *mcfgv1.MachineConfig) (retErr error) { if out, err := dn.storePendingState(newConfig, 1); err != nil { return errors.Wrapf(err, "failed to log pending config: %s", string(out)) } @@ -114,8 +185,7 @@ func (dn *Daemon) finalizeAndReboot(newConfig *mcfgv1.MachineConfig) (retErr err dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "PendingConfig", fmt.Sprintf("Written pending config %s", newConfig.GetName())) } - // reboot. this function shouldn't actually return. - return dn.reboot(fmt.Sprintf("Node will reboot into config %v", newConfig.GetName())) + return nil } func (dn *Daemon) drain() error { @@ -394,6 +464,101 @@ func (dn *Daemon) applyOSChanges(oldConfig, newConfig *mcfgv1.MachineConfig) (re } +func calculatePostConfigChangeActionFromFileDiffs(oldIgnConfig, newIgnConfig ign3types.Config) (actions []string) { + filesPostConfigChangeActionNone := []string{ + "/var/lib/kubelet/config.json", + } + filesPostConfigChangeActionReloadCrio := []string{ + "/etc/containers/registries.conf", + } + + oldFileSet := make(map[string]ign3types.File) + for _, f := range oldIgnConfig.Storage.Files { + oldFileSet[f.Path] = f + } + newFileSet := make(map[string]ign3types.File) + for _, f := range newIgnConfig.Storage.Files { + newFileSet[f.Path] = f + } + diffFileSet := []string{} + + // First check if any files were removed + for path := range oldFileSet { + _, ok := newFileSet[path] + if !ok { + // debug: remove + glog.Infof("File diff: %v was deleted", path) + diffFileSet = append(diffFileSet, path) + } + } + + // Now check if any files were added/changed + for path, newFile := range newFileSet { + oldFile, ok := oldFileSet[path] + if !ok { + // debug: remove + glog.Infof("File diff: %v was added", path) + diffFileSet = append(diffFileSet, path) + } else if !reflect.DeepEqual(oldFile, newFile) { + // debug: remove + glog.Infof("File diff: detected change to %v", newFile.Path) + diffFileSet = append(diffFileSet, path) + } + } + + // Now calculate action + for _, k := range diffFileSet { + if ctrlcommon.InSlice(k, filesPostConfigChangeActionNone) { + continue + } else if ctrlcommon.InSlice(k, filesPostConfigChangeActionReloadCrio) { + actions = []string{postConfigChangeActionReloadCrio} + continue + } else { + actions = []string{postConfigChangeActionReboot} + break + } + } + + if len(actions) == 0 { + actions = []string{postConfigChangeActionNone} + } + return +} + +func calculatePostConfigChangeAction(oldConfig, newConfig *mcfgv1.MachineConfig) ([]string, error) { + // If a machine-config-daemon-force file is present, it means the user wants to + // move to desired state without additional validation. We will reboot the node in + // this case regardless of what MachineConfig diff is. + if _, err := os.Stat(constants.MachineConfigDaemonForceFile); err == nil { + if err := os.Remove(constants.MachineConfigDaemonForceFile); err != nil { + return []string{}, errors.Wrap(err, "failed to remove force validation file") + } + glog.Infof("Setting post config change action to postConfigChangeActionReboot; %s present", constants.MachineConfigDaemonForceFile) + return []string{postConfigChangeActionReboot}, nil + } + + diff, err := newMachineConfigDiff(oldConfig, newConfig) + if err != nil { + return []string{}, err + } + if diff.osUpdate || diff.kargs || diff.fips || diff.units || diff.kernelType || diff.extensions { + // must reboot + return []string{postConfigChangeActionReboot}, nil + } + + oldIgnConfig, err := ctrlcommon.ParseAndConvertConfig(oldConfig.Spec.Config.Raw) + if err != nil { + return []string{}, err + } + newIgnConfig, err := ctrlcommon.ParseAndConvertConfig(newConfig.Spec.Config.Raw) + if err != nil { + return []string{}, err + } + + // We don't actually have to consider ssh keys changes, which is the only section of passwd that is allowed to change + return calculatePostConfigChangeActionFromFileDiffs(oldIgnConfig, newIgnConfig), nil +} + // update the node to the provided node configuration. func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr error) { oldConfig = canonicalizeEmptyMC(oldConfig) @@ -440,6 +605,11 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr err dn.logSystem("Starting update from %s to %s: %+v", oldConfigName, newConfigName, diff) + actions, err := calculatePostConfigChangeAction(oldConfig, newConfig) + if err != nil { + return err + } + if err := dn.drain(); err != nil { return err } @@ -515,7 +685,11 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr err glog.Info("Updated kernel tuning arguments") } - return dn.finalizeAndReboot(newConfig) + if err := dn.finalizeBeforeReboot(newConfig); err != nil { + return err + } + + return dn.performPostConfigChangeAction(actions, newConfig.GetName()) } // removeRollback removes the rpm-ostree rollback deployment. It diff --git a/pkg/daemon/update_test.go b/pkg/daemon/update_test.go index 5d8aca03cd..0091499c06 100644 --- a/pkg/daemon/update_test.go +++ b/pkg/daemon/update_test.go @@ -12,6 +12,7 @@ import ( ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" "github.com/openshift/machine-config-operator/test/helpers" "github.com/stretchr/testify/assert" + "github.com/vincent-petithory/dataurl" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8sfake "k8s.io/client-go/kubernetes/fake" ) @@ -469,6 +470,144 @@ func TestDropinCheck(t *testing.T) { } } +// Test to see if the correct action is calculated given a machineconfig diff +// i.e. whether we need to reboot and what actions need to be taken if no reboot is needed +func TestCalculatePostConfigChangeAction(t *testing.T) { + files := map[string]ign3types.File{ + "pullsecret1": ign3types.File{ + Node: ign3types.Node{ + Path: "/var/lib/kubelet/config.json", + }, + FileEmbedded1: ign3types.FileEmbedded1{ + Contents: ign3types.Resource{ + Source: helpers.StrToPtr(dataurl.EncodeBytes([]byte("kubelet conf 1\n"))), + }, + }, + }, + "pullsecret2": ign3types.File{ + Node: ign3types.Node{ + Path: "/var/lib/kubelet/config.json", + }, + FileEmbedded1: ign3types.FileEmbedded1{ + Contents: ign3types.Resource{ + Source: helpers.StrToPtr(dataurl.EncodeBytes([]byte("kubelet conf 2\n"))), + }, + }, + }, + "registries1": ign3types.File{ + Node: ign3types.Node{ + Path: "/etc/containers/registries.conf", + }, + FileEmbedded1: ign3types.FileEmbedded1{ + Contents: ign3types.Resource{ + Source: helpers.StrToPtr(dataurl.EncodeBytes([]byte("registries content 1\n"))), + }, + }, + }, + "registries2": ign3types.File{ + Node: ign3types.Node{ + Path: "/etc/containers/registries.conf", + }, + FileEmbedded1: ign3types.FileEmbedded1{ + Contents: ign3types.Resource{ + Source: helpers.StrToPtr(dataurl.EncodeBytes([]byte("registries content 2\n"))), + }, + }, + }, + "randomfile1": ign3types.File{ + Node: ign3types.Node{ + Path: "/etc/random-reboot-file", + }, + FileEmbedded1: ign3types.FileEmbedded1{ + Contents: ign3types.Resource{ + Source: helpers.StrToPtr(dataurl.EncodeBytes([]byte("test\n"))), + }, + }, + }, + "randomfile2": ign3types.File{ + Node: ign3types.Node{ + Path: "/etc/random-reboot-file", + }, + FileEmbedded1: ign3types.FileEmbedded1{ + Contents: ign3types.Resource{ + Source: helpers.StrToPtr(dataurl.EncodeBytes([]byte("test 2\n"))), + }, + }, + }, + } + + tests := []struct { + oldConfig *mcfgv1.MachineConfig + newConfig *mcfgv1.MachineConfig + expectedAction []string + }{ + { + // test that a normal file change is reboot + oldConfig: helpers.NewMachineConfig("00-test", nil, "dummy://", []ign3types.File{files["randomfile1"]}), + newConfig: helpers.NewMachineConfig("01-test", nil, "dummy://", []ign3types.File{files["randomfile2"]}), + expectedAction: []string{postConfigChangeActionReboot}, + }, + { + // test that a pull secret change is none + oldConfig: helpers.NewMachineConfig("00-test", nil, "dummy://", []ign3types.File{files["pullsecret1"]}), + newConfig: helpers.NewMachineConfig("01-test", nil, "dummy://", []ign3types.File{files["pullsecret2"]}), + expectedAction: []string{postConfigChangeActionNone}, + }, + { + // test that a SSH key change is none + oldConfig: helpers.NewMachineConfigExtended("00-test", nil, []ign3types.File{}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key1"}, []string{}, false, []string{}, "default", "dummy://"), + newConfig: helpers.NewMachineConfigExtended("01-test", nil, []ign3types.File{}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key2"}, []string{}, false, []string{}, "default", "dummy://"), + expectedAction: []string{postConfigChangeActionNone}, + }, + { + // test that a registries change is reload + oldConfig: helpers.NewMachineConfig("00-test", nil, "dummy://", []ign3types.File{files["registries1"]}), + newConfig: helpers.NewMachineConfig("01-test", nil, "dummy://", []ign3types.File{files["registries2"]}), + expectedAction: []string{postConfigChangeActionReloadCrio}, + }, + { + // test that a registries change (reload) overwrites pull secret (none) + oldConfig: helpers.NewMachineConfig("00-test", nil, "dummy://", []ign3types.File{files["registries1"], files["pullsecret1"]}), + newConfig: helpers.NewMachineConfig("01-test", nil, "dummy://", []ign3types.File{files["registries2"], files["pullsecret2"]}), + expectedAction: []string{postConfigChangeActionReloadCrio}, + }, + { + // test that a osImage change (reboot) overwrites registries (reload) and SSH keys (none) + oldConfig: helpers.NewMachineConfigExtended("00-test", nil, []ign3types.File{files["registries1"]}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key1"}, []string{}, false, []string{}, "default", "dummy://"), + newConfig: helpers.NewMachineConfigExtended("01-test", nil, []ign3types.File{files["registries2"]}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key2"}, []string{}, false, []string{}, "default", "dummy1://"), + expectedAction: []string{postConfigChangeActionReboot}, + }, + { + // test that adding a pull secret is none + oldConfig: helpers.NewMachineConfigExtended("00-test", nil, []ign3types.File{files["registries1"]}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key1"}, []string{}, false, []string{}, "default", "dummy://"), + newConfig: helpers.NewMachineConfigExtended("01-test", nil, []ign3types.File{files["registries1"], files["pullsecret2"]}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key1"}, []string{}, false, []string{}, "default", "dummy://"), + expectedAction: []string{postConfigChangeActionNone}, + }, + { + // test that removing a registries is crio reload + oldConfig: helpers.NewMachineConfigExtended("00-test", nil, []ign3types.File{files["randomfile1"], files["registries1"]}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key1"}, []string{}, false, []string{}, "default", "dummy://"), + newConfig: helpers.NewMachineConfigExtended("01-test", nil, []ign3types.File{files["randomfile1"]}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key1"}, []string{}, false, []string{}, "default", "dummy://"), + expectedAction: []string{postConfigChangeActionReloadCrio}, + }, + { + // mixed test - final should be reboot due to kargs changes + oldConfig: helpers.NewMachineConfigExtended("00-test", nil, []ign3types.File{files["registries1"]}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key1"}, []string{}, false, []string{}, "default", "dummy://"), + newConfig: helpers.NewMachineConfigExtended("01-test", nil, []ign3types.File{files["pullsecret2"]}, []ign3types.Unit{}, []ign3types.SSHAuthorizedKey{"key2"}, []string{}, false, []string{"karg1"}, "default", "dummy://"), + expectedAction: []string{postConfigChangeActionReboot}, + }, + } + + for idx, test := range tests { + t.Run(fmt.Sprintf("case#%d", idx), func(t *testing.T) { + calculatedAction, err := calculatePostConfigChangeAction(test.oldConfig, test.newConfig) + + if !reflect.DeepEqual(test.expectedAction, calculatedAction) { + t.Errorf("Failed calculating config change action: expected: %v but result is: %v. Error: %v", test.expectedAction, calculatedAction, err) + } + }) + } +} + // checkReconcilableResults is a shortcut for verifying results that should be reconcilable func checkReconcilableResults(t *testing.T, key string, reconcilableError error) { if reconcilableError != nil { diff --git a/test/e2e/mcd_test.go b/test/e2e/mcd_test.go index e2072e5eeb..0a0c69dd6c 100644 --- a/test/e2e/mcd_test.go +++ b/test/e2e/mcd_test.go @@ -3,6 +3,7 @@ package e2e_test import ( "context" "fmt" + "strconv" "strings" "testing" "time" @@ -325,6 +326,128 @@ func TestExtensions(t *testing.T) { require.Nil(t, err) } +func TestNoReboot(t *testing.T) { + cs := framework.NewClientSet("") + unlabelFunc := labelRandomNodeFromPool(t, cs, "worker", "node-role.kubernetes.io/infra") + oldInfraRenderedConfig := getMcName(t, cs, "infra") + + infraNode := getSingleNodeByRole(t, cs, "infra") + + output := execCmdOnNode(t, cs, infraNode, "cat", "/rootfs/proc/uptime") + oldTime := strings.Split(output, " ")[0] + t.Logf("Node %s initial uptime: %s", infraNode.Name, oldTime) + + // Adding authorized key for user core + testIgnConfig := ctrlcommon.NewIgnConfig() + testSSHKey := ign3types.PasswdUser{Name: "core", SSHAuthorizedKeys: []ign3types.SSHAuthorizedKey{"test adding authorized key without node reboot"}} + testIgnConfig.Passwd.Users = append(testIgnConfig.Passwd.Users, testSSHKey) + + addAuthorizedKey := &mcfgv1.MachineConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("authorzied-key-infra-%s", uuid.NewUUID()), + Labels: mcLabelForRole("infra"), + }, + Spec: mcfgv1.MachineConfigSpec{ + Config: runtime.RawExtension{ + Raw: helpers.MarshalOrDie(testIgnConfig), + }, + }, + } + + _, err := cs.MachineConfigs().Create(context.TODO(), addAuthorizedKey, metav1.CreateOptions{}) + require.Nil(t, err, "failed to create MC") + t.Logf("Created %s", addAuthorizedKey.Name) + + // grab the latest worker- MC + renderedConfig, err := waitForRenderedConfig(t, cs, "infra", addAuthorizedKey.Name) + require.Nil(t, err) + err = waitForPoolComplete(t, cs, "infra", renderedConfig) + require.Nil(t, err) + + // Re-fetch the infra node for updated annotations + infraNode = getSingleNodeByRole(t, cs, "infra") + + assert.Equal(t, infraNode.Annotations[constants.CurrentMachineConfigAnnotationKey], renderedConfig) + assert.Equal(t, infraNode.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) + + foundSSHKey := execCmdOnNode(t, cs, infraNode, "cat", "/rootfs/home/core/.ssh/authorized_keys") + if !strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + t.Fatalf("updated ssh keys not found in authorized_keys, got %s", foundSSHKey) + } + t.Logf("Node %s has SSH key", infraNode.Name) + + output = execCmdOnNode(t, cs, infraNode, "cat", "/rootfs/proc/uptime") + newTime := strings.Split(output, " ")[0] + + // To ensure we didn't reboot, new uptime should be greater than old uptime + uptimeOld, err := strconv.ParseFloat(oldTime, 64) + require.Nil(t, err) + + uptimeNew, err := strconv.ParseFloat(newTime, 64) + require.Nil(t, err) + + if uptimeOld > uptimeNew { + t.Fatalf("Node %s rebooted uptime decreased from %f to %f", infraNode.Name, uptimeOld, uptimeNew) + } + + t.Logf("Node %s didn't reboot as expected, uptime increased from %f to %f ", infraNode.Name, uptimeOld, uptimeNew) + + // Delete the applied authorized key MachineConfig to make sure rollback works fine without node reboot + if err := cs.MachineConfigs().Delete(context.TODO(), addAuthorizedKey.Name, metav1.DeleteOptions{}); err != nil { + t.Error(err) + } + + t.Logf("Deleted MachineConfig %s", addAuthorizedKey.Name) + + // Wait for the mcp to rollback to previous config + if err := waitForPoolComplete(t, cs, "infra", oldInfraRenderedConfig); err != nil { + t.Fatal(err) + } + + // Re-fetch the infra node for updated annotations + infraNode = getSingleNodeByRole(t, cs, "infra") + + assert.Equal(t, infraNode.Annotations[constants.CurrentMachineConfigAnnotationKey], oldInfraRenderedConfig) + assert.Equal(t, infraNode.Annotations[constants.MachineConfigDaemonStateAnnotationKey], constants.MachineConfigDaemonStateDone) + + foundSSHKey = execCmdOnNode(t, cs, infraNode, "cat", "/rootfs/home/core/.ssh/authorized_keys") + if strings.Contains(foundSSHKey, "test adding authorized key without node reboot") { + t.Fatalf("Node %s did not rollback successfully", infraNode.Name) + } + + t.Logf("Node %s has successfully rolled back", infraNode.Name) + + // Ensure that node didn't reboot during rollback + output = execCmdOnNode(t, cs, infraNode, "cat", "/rootfs/proc/uptime") + newTime = strings.Split(output, " ")[0] + + uptimeNew, err = strconv.ParseFloat(newTime, 64) + require.Nil(t, err) + + if uptimeOld > uptimeNew { + t.Fatalf("Node %s rebooted during rollback, uptime decreased from %f to %f", infraNode.Name, uptimeOld, uptimeNew) + } + + t.Logf("Node %s didn't reboot as expected during rollback, uptime increased from %f to %f ", infraNode.Name, uptimeOld, uptimeNew) + + unlabelFunc() + + workerMCP, err := cs.MachineConfigPools().Get(context.TODO(), "worker", metav1.GetOptions{}) + require.Nil(t, err) + if err := wait.Poll(2*time.Second, 5*time.Minute, func() (bool, error) { + node, err := cs.Nodes().Get(context.TODO(), infraNode.Name, metav1.GetOptions{}) + require.Nil(t, err) + if node.Annotations[constants.DesiredMachineConfigAnnotationKey] != workerMCP.Spec.Configuration.Name { + return false, nil + } + return true, nil + }); err != nil { + t.Errorf("infra node hasn't moved back to worker config: %v", err) + } + err = waitForPoolComplete(t, cs, "infra", oldInfraRenderedConfig) + require.Nil(t, err) +} + func TestPoolDegradedOnFailToRender(t *testing.T) { cs := framework.NewClientSet("") diff --git a/test/e2e/utils_test.go b/test/e2e/utils_test.go index 401404774b..ec9eaf6dbb 100644 --- a/test/e2e/utils_test.go +++ b/test/e2e/utils_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "math/rand" + "os" "os/exec" "testing" "time" @@ -166,21 +167,24 @@ func createMC(name, role string) *mcfgv1.MachineConfig { // execCmdOnNode finds a node's mcd, and oc rsh's into it to execute a command on the node // all commands should use /rootfs as root -func execCmdOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, args ...string) string { +func execCmdOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, subArgs ...string) string { mcd, err := mcdForNode(cs, &node) require.Nil(t, err) mcdName := mcd.ObjectMeta.Name entryPoint := "oc" - cmd := []string{"rsh", + args := []string{"rsh", "-n", "openshift-machine-config-operator", "-c", "machine-config-daemon", mcdName} - cmd = append(cmd, args...) + args = append(args, subArgs...) - b, err := exec.Command(entryPoint, cmd...).CombinedOutput() - require.Nil(t, err, "failed to exec cmd %v on node %s: %s", args, node.Name, string(b)) - return string(b) + cmd := exec.Command(entryPoint, args...) + cmd.Stderr = os.Stderr + + out, err := cmd.Output() + require.Nil(t, err, "failed to exec cmd %v on node %s: %s", subArgs, node.Name, string(out)) + return string(out) } func mcLabelForRole(role string) map[string]string { diff --git a/test/helpers/helpers.go b/test/helpers/helpers.go index 4975a16a17..680b6bd8a6 100644 --- a/test/helpers/helpers.go +++ b/test/helpers/helpers.go @@ -35,6 +35,32 @@ func BoolToPtr(b bool) *bool { // NewMachineConfig returns a basic machine config with supplied labels, osurl & files added func NewMachineConfig(name string, labels map[string]string, osurl string, files []ign3types.File) *mcfgv1.MachineConfig { + return NewMachineConfigExtended( + name, + labels, + files, + []ign3types.Unit{}, + []ign3types.SSHAuthorizedKey{}, + []string{}, + false, + []string{}, + "", + osurl, + ) +} + +// NewMachineConfigExtended returns a more comprehensive machine config +func NewMachineConfigExtended( + name string, + labels map[string]string, + files []ign3types.File, + units []ign3types.Unit, + sshkeys []ign3types.SSHAuthorizedKey, + extensions []string, + fips bool, + kernelArguments []string, + kernelType, osurl string, +) *mcfgv1.MachineConfig { if labels == nil { labels = map[string]string{} } @@ -46,6 +72,17 @@ func NewMachineConfig(name string, labels map[string]string, osurl string, files Storage: ign3types.Storage{ Files: files, }, + Systemd: ign3types.Systemd{ + Units: units, + }, + Passwd: ign3types.Passwd{ + Users: []ign3types.PasswdUser{ + { + Name: "core", + SSHAuthorizedKeys: sshkeys, + }, + }, + }, }, ) @@ -59,10 +96,14 @@ func NewMachineConfig(name string, labels map[string]string, osurl string, files UID: types.UID(utilrand.String(5)), }, Spec: mcfgv1.MachineConfigSpec{ - OSImageURL: osurl, Config: runtime.RawExtension{ Raw: rawIgnition, }, + Extensions: extensions, + FIPS: fips, + KernelArguments: kernelArguments, + KernelType: kernelType, + OSImageURL: osurl, }, } }