diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 152b85468a..d2cf1b9e7a 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -726,30 +726,27 @@ func calculatePostConfigChangeAction(diff *machineConfigDiff, diffFileSet []stri func (dn *Daemon) calculatePostConfigChangeNodeDisruptionAction(diff *machineConfigDiff, diffFileSet, diffUnitSet []string) ([]opv1.NodeDisruptionPolicyStatusAction, error) { var mcop *opv1.MachineConfiguration - var err error + var pollErr error // Wait for mcop.Status.NodeDisruptionPolicyStatus to populate, otherwise error out. This shouldn't take very long // as this is done by the operator sync loop, but may be extended if transitioning to TechPreview as the operator restarts, - if err = wait.PollUntilContextTimeout(context.TODO(), 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { - mcop, err = dn.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{}) - if err != nil { + if err := wait.PollUntilContextTimeout(context.TODO(), 5*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + mcop, pollErr = dn.mcopClient.OperatorV1().MachineConfigurations().Get(context.TODO(), ctrlcommon.MCOOperatorKnobsObjectName, metav1.GetOptions{}) + if pollErr != nil { klog.Errorf("calculating NodeDisruptionPolicies: MachineConfiguration/cluster has not been created yet") - err = fmt.Errorf("MachineConfiguration/cluster has not been created yet") + pollErr = fmt.Errorf("MachineConfiguration/cluster has not been created yet") return false, nil } - // There will always be atleast five file policies, if they don't exist, then the Status hasn't been populated yet - // - // TODO: When Conditions on this object are implemented; this check could be updated to only proceed when - // status.ObservedGeneration matches the last generation of MachineConfiguration - if len(mcop.Status.NodeDisruptionPolicyStatus.ClusterPolicies.Files) == 0 { - klog.Errorf("calculating NodeDisruptionPolicies: NodeDisruptionPolicyStatus has not been populated yet") - err = fmt.Errorf("NodeDisruptionPolicyStatus has not been populated yet") + + // Ensure status.ObservedGeneration matches the last generation of MachineConfiguration + if mcop.Generation != mcop.Status.ObservedGeneration { + klog.Errorf("calculating NodeDisruptionPolicies: NodeDisruptionPolicyStatus is not up to date.") + pollErr = fmt.Errorf("NodeDisruptionPolicyStatus is not up to date") return false, nil } return true, nil }); err != nil { - klog.Errorf("NodeDisruptionPolicyStatus was not ready: %v", err) - err = fmt.Errorf("NodeDisruptionPolicyStatus was not ready: %v", err) - return nil, err + klog.Errorf("NodeDisruptionPolicyStatus was not ready: %v", pollErr) + return nil, fmt.Errorf("NodeDisruptionPolicyStatus was not ready: %v", pollErr) } // Continue policy calculation if no errors were encountered in fetching the policy. @@ -757,7 +754,7 @@ func (dn *Daemon) calculatePostConfigChangeNodeDisruptionAction(diff *machineCon // move to desired state without additional validation. We will reboot the node in // this case regardless of what MachineConfig diff is. klog.Infof("Calculating node disruption actions") - if _, err = os.Stat(constants.MachineConfigDaemonForceFile); err == nil { + if _, err := os.Stat(constants.MachineConfigDaemonForceFile); err == nil { if err = os.Remove(constants.MachineConfigDaemonForceFile); err != nil { return []opv1.NodeDisruptionPolicyStatusAction{}, fmt.Errorf("failed to remove force validation file: %w", err) } @@ -1052,16 +1049,10 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi } var nodeDisruptionActions []opv1.NodeDisruptionPolicyStatusAction - var nodeDisruptionError error var actions []string // If FeatureGateNodeDisruptionPolicy is set, calculate NodeDisruptionPolicy based actions for this MC diff if fg != nil && fg.Enabled(features.FeatureGateNodeDisruptionPolicy) { - nodeDisruptionActions, nodeDisruptionError = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, diffUnitSet) - if nodeDisruptionError != nil { - // TODO: Fallback to legacy path and signal failure here - klog.Errorf("could not calculate node disruption actions: %v", nodeDisruptionError) - actions, err = calculatePostConfigChangeAction(diff, diffFileSet) - } + nodeDisruptionActions, err = dn.calculatePostConfigChangeNodeDisruptionAction(diff, diffFileSet, diffUnitSet) } else { actions, err = calculatePostConfigChangeAction(diff, diffFileSet) } @@ -1083,13 +1074,13 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi } var drain bool - if fg != nil && fg.Enabled(features.FeatureGateNodeDisruptionPolicy) && nodeDisruptionError == nil { + if fg != nil && fg.Enabled(features.FeatureGateNodeDisruptionPolicy) { // Check actions list and perform node drain if required drain, err = isDrainRequiredForNodeDisruptionActions(nodeDisruptionActions, oldIgnConfig, newIgnConfig) if err != nil { return err } - klog.Infof("Drain calculated for node disruption: %v", drain) + klog.Infof("Drain calculated for node disruption: %v for config %s", drain, newConfigName) } else { // Check and perform node drain if required crioOverrideConfigmapExists, err := dn.hasImageRegistryDrainOverrideConfigMap() @@ -1285,7 +1276,7 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig, skipCertifi klog.Errorf("Error making MCN for Updated Files and OS: %v", err) } - if fg != nil && fg.Enabled(features.FeatureGateNodeDisruptionPolicy) && nodeDisruptionError == nil { + if fg != nil && fg.Enabled(features.FeatureGateNodeDisruptionPolicy) { return dn.performPostConfigChangeNodeDisruptionAction(nodeDisruptionActions, newConfig.GetName()) } // If we're here, FeatureGateNodeDisruptionPolicy is off/errored, so perform legacy action diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index ab9e621b73..8c2a538737 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -2038,17 +2038,20 @@ func (optr *Operator) syncMachineConfiguration(_ *renderConfig) error { } // Merges the cluster's default node disruption policies with the user defined policies, if any. - newNodeDisruptionPolicyStatus := opv1.NodeDisruptionPolicyStatus{ - ClusterPolicies: apihelpers.MergeClusterPolicies(mcop.Spec.NodeDisruptionPolicy), + newMachineConfigurationStatus := opv1.MachineConfigurationStatus{ + NodeDisruptionPolicyStatus: opv1.NodeDisruptionPolicyStatus{ + ClusterPolicies: apihelpers.MergeClusterPolicies(mcop.Spec.NodeDisruptionPolicy), + }, + ObservedGeneration: mcop.GetGeneration(), } // Check if any changes are required in the Status before making the API call. - if !reflect.DeepEqual(mcop.Status.NodeDisruptionPolicyStatus, newNodeDisruptionPolicyStatus) { - klog.Infof("Updating NodeDisruptionPolicy status") - mcop.Status.NodeDisruptionPolicyStatus = newNodeDisruptionPolicyStatus + if !reflect.DeepEqual(mcop.Status, newMachineConfigurationStatus) { + klog.Infof("Updating MachineConfiguration status") + mcop.Status = newMachineConfigurationStatus _, err = optr.mcopClient.OperatorV1().MachineConfigurations().UpdateStatus(context.TODO(), mcop, metav1.UpdateOptions{}) if err != nil { - klog.Errorf("NodeDisruptionPolicy status apply failed: %v", err) + klog.Errorf("MachineConfiguration status apply failed: %v", err) return nil } } diff --git a/test/e2e-shared-tests/helpers.go b/test/e2e-shared-tests/helpers.go index 755930bfc0..74a2360bbb 100644 --- a/test/e2e-shared-tests/helpers.go +++ b/test/e2e-shared-tests/helpers.go @@ -102,27 +102,6 @@ func waitForConfigDriftMonitorStart(t *testing.T, cs *framework.ClientSet, node require.Nil(t, err, "expected config drift monitor to start (waited %v)", end) } -func assertLogsContain(t *testing.T, cs *framework.ClientSet, mcdPod *corev1.Pod, node *corev1.Node, expectedContents string) { - t.Helper() - - logs, err := cs.Pods(mcdPod.Namespace).GetLogs(mcdPod.Name, &corev1.PodLogOptions{ - Container: "machine-config-daemon", - }).DoRaw(context.TODO()) - if err != nil { - // common err is that the mcd went down mid cmd. Re-try for good measure - mcdPod, err = helpers.MCDForNode(cs, node) - require.Nil(t, err) - logs, err = cs.Pods(mcdPod.Namespace).GetLogs(mcdPod.Name, &corev1.PodLogOptions{ - Container: "machine-config-daemon", - }).DoRaw(context.TODO()) - } - require.Nil(t, err) - - if !strings.Contains(string(logs), expectedContents) { - t.Fatalf("expected to find '%s' in logs for %s/%s", expectedContents, mcdPod.Namespace, mcdPod.Name) - } -} - func assertNodeIsInDoneState(t *testing.T, cs *framework.ClientSet, node corev1.Node) { t.Helper() diff --git a/test/e2e-shared-tests/mcd_config_drift.go b/test/e2e-shared-tests/mcd_config_drift.go index 9ca22daae0..2bb4a61a93 100644 --- a/test/e2e-shared-tests/mcd_config_drift.go +++ b/test/e2e-shared-tests/mcd_config_drift.go @@ -388,7 +388,7 @@ func assertNodeAndMCPIsDegraded(t *testing.T, cs *framework.ClientSet, node core mcdPod, err := helpers.MCDForNode(cs, &node) require.Nil(t, err) - assertLogsContain(t, cs, mcdPod, &node, logEntry) + helpers.AssertMCDLogsContain(t, cs, mcdPod, &node, logEntry) // Assert that the MachineConfigPool eventually reaches a degraded state and has the config mismatch as the reason. t.Log("Verifying MachineConfigPool becomes degraded due to config mismatch") diff --git a/test/e2e-techpreview/helpers_test.go b/test/e2e-techpreview/helpers_test.go index d56f273994..266e12cd3b 100644 --- a/test/e2e-techpreview/helpers_test.go +++ b/test/e2e-techpreview/helpers_test.go @@ -23,7 +23,6 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" corev1 "k8s.io/api/core/v1" - apierrs "k8s.io/apimachinery/pkg/api/errors" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" aggerrs "k8s.io/apimachinery/pkg/util/errors" @@ -564,7 +563,7 @@ func copyEntitlementCerts(t *testing.T, cs *framework.ClientSet) func() { return cloneSecret(t, cs, name, namespace, name, ctrlcommon.MCONamespace) } - if apierrs.IsNotFound(err) { + if k8serrors.IsNotFound(err) { t.Logf("Secret %q not found in %q, skipping test", name, namespace) t.Skip() return func() {} diff --git a/test/e2e-techpreview/main_test.go b/test/e2e-techpreview/main_test.go index 216f748aaa..653cb13f96 100644 --- a/test/e2e-techpreview/main_test.go +++ b/test/e2e-techpreview/main_test.go @@ -11,7 +11,7 @@ func TestMain(m *testing.M) { // Ensure required feature gates are set. // Add any new feature gates to the test here, and remove them as features are GAed. - helpers.MustHaveFeatureGatesEnabled("ManagedBootImages", "OnClusterBuild", "MachineConfigNodes") + helpers.MustHaveFeatureGatesEnabled("ManagedBootImages", "OnClusterBuild", "MachineConfigNodes", "NodeDisruptionPolicy") os.Exit(m.Run()) } diff --git a/test/e2e-techpreview/nodedisrupt_test.go b/test/e2e-techpreview/nodedisrupt_test.go new file mode 100644 index 0000000000..0916cea8d6 --- /dev/null +++ b/test/e2e-techpreview/nodedisrupt_test.go @@ -0,0 +1,229 @@ +package e2e_techpreview_test + +import ( + "bytes" + "context" + "fmt" + "testing" + + "github.com/BurntSushi/toml" + "github.com/containers/image/v5/pkg/sysregistriesv2" + + v1 "github.com/openshift/api/machineconfiguration/v1" + opv1 "github.com/openshift/api/operator/v1" + mcoac "github.com/openshift/client-go/operator/applyconfigurations/operator/v1" + mcopclientset "github.com/openshift/client-go/operator/clientset/versioned" + ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" + constants "github.com/openshift/machine-config-operator/pkg/daemon/constants" + + "github.com/openshift/machine-config-operator/test/framework" + "github.com/openshift/machine-config-operator/test/helpers" + "github.com/stretchr/testify/require" + + ign3types "github.com/coreos/ignition/v2/config/v3_4/types" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + // The MachineConfigPools to create for the tests. + testMCPFileName string = "infra-file" + testMCPSSHName string = "infra-ssh" + testMCPUnitName string = "infra-unit" + testMCPSpecialName string = "infra-special" +) + +func TestNodeDisruptionPolicies(t *testing.T) { + + cs := framework.NewClientSet("") + nodes, err := helpers.GetNodesByRole(cs, "worker") + require.Nil(t, err, "error grabbing nodes from worker pool") + require.Greater(t, len(nodes), 0, "this test requires atleast one node in the worker pool") + + // By equally(based on run-time) separating out the actions to be tested across three parallel sub-tests, + // the test suite can be run much faster. This still provides sufficient coverage as the calculation and + // execution of policies are independant of each other. Each testFunc can handle any combination of testActions. + + testActionsAlpha := []opv1.NodeDisruptionPolicySpecAction{ + {Type: opv1.NoneSpecAction}, {Type: opv1.DrainSpecAction}} + + testActionsBeta := []opv1.NodeDisruptionPolicySpecAction{{Type: opv1.RebootSpecAction}} + + testActionsGamma := []opv1.NodeDisruptionPolicySpecAction{{Type: opv1.DaemonReloadSpecAction}, + {Type: opv1.RestartSpecAction, Restart: &opv1.RestartService{ServiceName: "crio.service"}}, + {Type: opv1.ReloadSpecAction, Reload: &opv1.ReloadService{ServiceName: "crio.service"}}} + + // Shuffle the three action sets so each testFunc is randomly assigned one of the above action sets. + testActionSets := [][]opv1.NodeDisruptionPolicySpecAction{testActionsAlpha, testActionsBeta, testActionsGamma} + helpers.ShuffleSlice(testActionSets) + + testFuncs := []func(*testing.T, corev1.Node, []opv1.NodeDisruptionPolicySpecAction){testFilePolicy, testUnitPolicy, testSSHKeyPolicy} + + for testIndex, testFunc := range testFuncs { + testIndex := testIndex + testFunc := testFunc + t.Run(helpers.GetFunctionName(testFunc), func(t *testing.T) { + // Only parallelize if there are enough nodes to run the tests individually + if len(nodes) >= len(testFuncs) { + t.Parallel() + testFunc(t, nodes[testIndex], testActionSets[testIndex]) + } else { + testFunc(t, nodes[0], testActionSets[testIndex]) + } + + }) + } + +} + +func testFilePolicy(t *testing.T, node corev1.Node, testActions []opv1.NodeDisruptionPolicySpecAction) { + + cs := framework.NewClientSet("") + t.Cleanup(helpers.CreatePoolAndApplyMCToNode(t, cs, testMCPFileName, node, nil)) + + // Step through each action and check if it was applied correctly. This loop generates a MachineConfiguration + // and a MachineConfig for each testAction and verifies that the correct NodeDisruptionPolicy was applied. + for _, action := range testActions { + fileName := "/etc/test-" + string(action.Type) + fileApplyConfiguration := mcoac.NodeDisruptionPolicySpecFile().WithPath(fileName).WithActions(helpers.GetActionApplyConfiguration(action)) + applyConfiguration := mcoac.MachineConfiguration("cluster").WithSpec(mcoac.MachineConfigurationSpec().WithManagementState("Managed").WithNodeDisruptionPolicy(mcoac.NodeDisruptionPolicyConfig().WithFiles(fileApplyConfiguration))) + // Create the test MC object, derived from the action under test + testMC := helpers.NewMachineConfig("01-test-file", helpers.MCLabelForRole(testMCPFileName), "", []ign3types.File{ctrlcommon.NewIgnFile(fileName, "test\n")}) + checkNodeDisruptionAction(t, cs, testMC, testMCPFileName, *applyConfiguration, node, action.Type) + } + +} + +func testUnitPolicy(t *testing.T, nodeUnderTest corev1.Node, testActions []opv1.NodeDisruptionPolicySpecAction) { + + cs := framework.NewClientSet("") + t.Cleanup(helpers.CreatePoolAndApplyMCToNode(t, cs, testMCPUnitName, nodeUnderTest, nil)) + + // Step through each action and check if it was applied correctly. This loop generates a MachineConfiguration + // and a MachineConfig for each testAction and verifies that the correct NodeDisruptionPolicy was applied. + for _, action := range testActions { + serviceName := string(action.Type) + "-test.service" + serviceApplyConfiguration := mcoac.NodeDisruptionPolicySpecUnit().WithName(opv1.NodeDisruptionPolicyServiceName(serviceName)).WithActions(helpers.GetActionApplyConfiguration(action)) + applyConfiguration := mcoac.MachineConfiguration("cluster").WithSpec(mcoac.MachineConfigurationSpec().WithManagementState("Managed").WithNodeDisruptionPolicy(mcoac.NodeDisruptionPolicyConfig().WithUnits(serviceApplyConfiguration))) + testMC := helpers.NewMachineConfigExtended("01-test-unit", helpers.MCLabelForRole(testMCPUnitName), nil, nil, []ign3types.Unit{{Name: serviceName, Contents: helpers.StrToPtr("test")}}, nil, nil, false, nil, "", "") + checkNodeDisruptionAction(t, cs, testMC, testMCPUnitName, *applyConfiguration, nodeUnderTest, action.Type) + } +} + +func testSSHKeyPolicy(t *testing.T, node corev1.Node, testActions []opv1.NodeDisruptionPolicySpecAction) { + + cs := framework.NewClientSet("") + t.Cleanup(helpers.CreatePoolAndApplyMCToNode(t, cs, testMCPSSHName, node, nil)) + + // Step through each action and check if it was applied correctly. This loop generates a MachineConfiguration + // and a MachineConfig for each testAction and verifies that the correct NodeDisruptionPolicy was applied. + for _, action := range testActions { + sshApplyConfiguration := mcoac.NodeDisruptionPolicySpecSSHKey().WithActions(helpers.GetActionApplyConfiguration(action)) + applyConfiguration := mcoac.MachineConfiguration("cluster").WithSpec(mcoac.MachineConfigurationSpec().WithManagementState("Managed").WithNodeDisruptionPolicy(mcoac.NodeDisruptionPolicyConfig().WithSSHKey(sshApplyConfiguration))) + testMC := helpers.NewMachineConfigExtended("01-test-ssh", helpers.MCLabelForRole(testMCPSSHName), nil, nil, nil, []ign3types.SSHAuthorizedKey{"test"}, nil, false, nil, "", "") + checkNodeDisruptionAction(t, cs, testMC, testMCPSSHName, *applyConfiguration, node, action.Type) + } + +} + +// This is a bespoke test function for the Special Action. This verifies that drain only takes +// place when registries are removed from /etc/containers/registries.conf and no drain takes +// place when registried are added. It also verifies that crio reloads takes place in both cases. +func TestNodeDisruptionPolicySpecialAction(t *testing.T) { + + t.Logf("Verifying Special action") + + cs := framework.NewClientSet("") + t.Cleanup(helpers.CreatePoolAndApplyMC(t, cs, testMCPSpecialName, nil)) + nodeUnderTest := helpers.GetSingleNodeByRole(t, cs, testMCPSpecialName) + + // Grab name of MCD pod under test + mcdPod, err := helpers.MCDForNode(cs, &nodeUnderTest) + require.Nil(t, err, "determing mcd pod of node under test failed") + + // Grab old rendered MC to figure out transition back + oldRenderedMC := helpers.GetMcName(t, cs, testMCPSpecialName) + + // Modify registry file on disk by adding a new "test.io" domain + tomlReg := sysregistriesv2.V2RegistriesConf{} + _, err = toml.Decode(helpers.GetFileContentOnNode(t, cs, nodeUnderTest, constants.ContainerRegistryConfPath), &tomlReg) + require.Nil(t, err, "failed decoding TOML content from file %s: %w", constants.ContainerRegistryConfPath, err) + tomlReg.UnqualifiedSearchRegistries = append(tomlReg.UnqualifiedSearchRegistries, "test.io") + var newFile bytes.Buffer + encoder := toml.NewEncoder(&newFile) + err = encoder.Encode(tomlReg) + require.Nil(t, err, "failed encoding TOML content into file %s: %w", constants.ContainerRegistryConfPath, err) + + // Create the a test machineconfig from the file in cluster + testMC := helpers.NewMachineConfig("01-test", helpers.MCLabelForRole(testMCPSpecialName), "", []ign3types.File{ctrlcommon.NewIgnFile(constants.ContainerRegistryConfPath, newFile.String())}) + _, err = cs.MachineconfigurationV1Interface.MachineConfigs().Create(context.TODO(), testMC, metav1.CreateOptions{}) + require.Nil(t, err, "creating test machine config failed") + + // Wait for transition to new config + renderedMC := helpers.WaitForConfigAndPoolComplete(t, cs, testMCPSpecialName, testMC.Name) + + // Check daemon logs to ensure drain was not executed for this case and that a crio reload took place for this MC change. + helpers.AssertMCDLogsContain(t, cs, mcdPod, &nodeUnderTest, fmt.Sprintf("Drain calculated for node disruption: false for config %s", renderedMC)) + helpers.AssertMCDLogsContain(t, cs, mcdPod, &nodeUnderTest, fmt.Sprintf("Performing post config change action: Special for config %s", renderedMC)) + + // Remove test config, this is the equivalent of deleting a registry + cs.MachineconfigurationV1Interface.MachineConfigs().Delete(context.TODO(), testMC.Name, metav1.DeleteOptions{}) + require.Nil(t, err, "deleting test machine config failed") + + // Wait for pool to return to old MC before checking actions + helpers.WaitForPoolComplete(t, cs, testMCPSpecialName, oldRenderedMC) + + // Check daemon logs to ensure drain action was executed and that a crio reload took place for this MC change. + helpers.AssertMCDLogsContain(t, cs, mcdPod, &nodeUnderTest, fmt.Sprintf("Drain calculated for node disruption: true for config %s", oldRenderedMC)) + helpers.AssertMCDLogsContain(t, cs, mcdPod, &nodeUnderTest, fmt.Sprintf("Performing post config change action: Special for config %s", oldRenderedMC)) + + t.Logf("Successfully verified Special action") + +} + +func checkNodeDisruptionAction(t *testing.T, cs *framework.ClientSet, testMC *v1.MachineConfig, testMCP string, applyConfiguration mcoac.MachineConfigurationApplyConfiguration, nodeUnderTest corev1.Node, expectedAction opv1.NodeDisruptionPolicySpecActionType) { + + t.Logf("Verifying %s", expectedAction) + + // Grab the client + machineConfigurationClient := mcopclientset.NewForConfigOrDie(cs.GetRestConfig()) + + // This may be needed to verify reboot action + initialUptime := helpers.GetNodeUptime(t, cs, nodeUnderTest) + + // Grab name of MCD pod under test + mcdPod, err := helpers.MCDForNode(cs, &nodeUnderTest) + require.Nil(t, err, "determing mcd pod of node under test failed") + + // Apply the node disruption policy + _, err = machineConfigurationClient.OperatorV1().MachineConfigurations().Apply(context.TODO(), &applyConfiguration, metav1.ApplyOptions{FieldManager: "machine-config-operator" + testMCP}) + require.Nil(t, err, "updating cluster node disruption policy failed") + + // Grab old rendered MC to figure out transition back + oldRenderedMC := helpers.GetMcName(t, cs, testMCP) + + // Create the test machineconfig in cluster + _, err = cs.MachineconfigurationV1Interface.MachineConfigs().Create(context.TODO(), testMC, metav1.CreateOptions{}) + require.Nil(t, err, "creating test machine config failed") + + // Wait for transition to new config + renderedMC := helpers.WaitForConfigAndPoolComplete(t, cs, testMCP, testMC.Name) + + // Check daemon logs to ensure correct action was executed + if expectedAction == opv1.DrainSpecAction { + helpers.AssertMCDLogsContain(t, cs, mcdPod, &nodeUnderTest, fmt.Sprintf("Drain calculated for node disruption: true for config %s", renderedMC)) + } else if expectedAction == opv1.RebootSpecAction { + helpers.AssertNodeReboot(t, cs, nodeUnderTest, initialUptime) + } else { + helpers.AssertMCDLogsContain(t, cs, mcdPod, &nodeUnderTest, fmt.Sprintf("Performing post config change action: %v for config %s", expectedAction, renderedMC)) + } + + // Remove test config + cs.MachineconfigurationV1Interface.MachineConfigs().Delete(context.TODO(), testMC.Name, metav1.DeleteOptions{}) + require.Nil(t, err, "deleting test machine config failed") + + // Wait for pool to return to old MC before running next test case + helpers.WaitForPoolComplete(t, cs, testMCP, oldRenderedMC) + + t.Logf("Successfully verified %s", expectedAction) +} diff --git a/test/helpers/utils.go b/test/helpers/utils.go index 1ccd2761f5..cdcab866a4 100644 --- a/test/helpers/utils.go +++ b/test/helpers/utils.go @@ -10,6 +10,8 @@ import ( "os" "os/exec" "path/filepath" + "reflect" + "runtime" "strconv" "strings" @@ -41,6 +43,9 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" + + opv1 "github.com/openshift/api/operator/v1" + mcoac "github.com/openshift/client-go/operator/applyconfigurations/operator/v1" ) type CleanupFuncs struct { @@ -760,6 +765,17 @@ func AssertFileNotOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node assert.Contains(t, out, "No such file or directory", "expected command output to contain 'No such file or directory', got: %s", out) } +// Asserts that a given file exists on the underlying node and returns the content of the file. +func GetFileContentOnNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, path string) string { + t.Helper() + + path = canonicalizeNodeFilePath(path) + + out, err := ExecCmdOnNodeWithError(cs, node, "cat", path) + assert.NoError(t, err, "expected to find file %s on %s, got:\n%s", path, node.Name, out) + return out +} + // Adds the /rootfs onto a given file path, if not already present. func canonicalizeNodeFilePath(path string) string { rootfs := "/rootfs" @@ -1199,3 +1215,50 @@ func WriteFileToNode(t *testing.T, cs *framework.ClientSet, node corev1.Node, fi ExecCmdOnNode(t, cs, node, "rm", filename) }) } + +// AssertMCDLogsContain asserts that the MCD pod's logs contains a target string value +func AssertMCDLogsContain(t *testing.T, cs *framework.ClientSet, mcdPod *corev1.Pod, node *corev1.Node, expectedContents string) { + t.Helper() + logs, err := cs.Pods(mcdPod.Namespace).GetLogs(mcdPod.Name, &corev1.PodLogOptions{ + Container: "machine-config-daemon", + }).DoRaw(context.TODO()) + if err != nil { + // common err is that the mcd went down mid cmd. Re-try for good measure + mcdPod, err = MCDForNode(cs, node) + require.Nil(t, err) + logs, err = cs.Pods(mcdPod.Namespace).GetLogs(mcdPod.Name, &corev1.PodLogOptions{ + Container: "machine-config-daemon", + }).DoRaw(context.TODO()) + } + require.Nil(t, err) + + if !strings.Contains(string(logs), expectedContents) { + t.Fatalf("expected to find '%s' in logs for %s/%s", expectedContents, mcdPod.Namespace, mcdPod.Name) + } +} + +func GetFunctionName(i interface{}) string { + strs := strings.Split((runtime.FuncForPC(reflect.ValueOf(i).Pointer()).Name()), ".") + return strs[len(strs)-1] +} + +func ShuffleSlice(slice interface{}) { + v := reflect.ValueOf(slice) + rand.Shuffle(v.Len(), func(i, j int) { + vi := v.Index(i).Interface() + v.Index(i).Set(v.Index(j)) + v.Index(j).Set(reflect.ValueOf(vi)) + }) +} + +func GetActionApplyConfiguration(action opv1.NodeDisruptionPolicySpecAction) *mcoac.NodeDisruptionPolicySpecActionApplyConfiguration { + if action.Type == opv1.ReloadSpecAction { + reloadApplyConfiguration := mcoac.ReloadService().WithServiceName(action.Reload.ServiceName) + return mcoac.NodeDisruptionPolicySpecAction().WithType(action.Type).WithReload(reloadApplyConfiguration) + } else if action.Type == opv1.RestartSpecAction { + restartApplyConfiguration := mcoac.RestartService().WithServiceName(action.Restart.ServiceName) + return mcoac.NodeDisruptionPolicySpecAction().WithType(action.Type).WithRestart(restartApplyConfiguration) + } else { + return mcoac.NodeDisruptionPolicySpecAction().WithType(action.Type) + } +}