diff --git a/docs/NodeDisruptionPolicy.md b/docs/NodeDisruptionPolicy.md index 1515ec8622..bdfa094902 100644 --- a/docs/NodeDisruptionPolicy.md +++ b/docs/NodeDisruptionPolicy.md @@ -27,11 +27,11 @@ $ oc get MachineConfiguration/cluster -o yaml apiVersion: operator.openshift.io/v1 kind: MachineConfiguration metadata: - creationTimestamp: "2024-04-16T15:02:37Z" - generation: 4 + creationTimestamp: "2024-07-11T13:44:07Z" + generation: 9 name: cluster - resourceVersion: "261205" - uid: 2c67b155-1898-452f-adbd-ed376afc0ea2 + resourceVersion: "239810" + uid: 2f31b426-8d50-414a-a46a-463269686b2f spec: logLevel: Normal managementState: Managed @@ -56,10 +56,27 @@ status: - actions: - type: Special path: /etc/containers/registries.conf + - actions: + - reload: + serviceName: crio.service + type: Reload + path: /etc/containers/registries.d + - actions: + - type: None + path: /etc/nmstate/openshift + - actions: + - restart: + serviceName: coreos-update-ca-trust.service + type: Restart + - restart: + serviceName: crio.service + type: Restart + path: /etc/pki/ca-trust/source/anchors/openshift-config-user-ca-bundle.crt sshkey: actions: - type: None - readyReplicas: 0 + observedGeneration: 9 + ``` Say, for instance the user applied the following MachineConfiguration: ``` @@ -81,27 +98,24 @@ $ oc get MachineConfiguration/cluster -o yaml apiVersion: operator.openshift.io/v1 kind: MachineConfiguration metadata: - creationTimestamp: "2024-04-16T15:02:37Z" - generation: 4 + creationTimestamp: "2024-07-11T13:44:07Z" + generation: 10 name: cluster - resourceVersion: "261205" - uid: 2c67b155-1898-452f-adbd-ed376afc0ea2 + resourceVersion: "240452" + uid: 2f31b426-8d50-414a-a46a-463269686b2f spec: - nodeDisruptionPolicy: - files: - - path: /etc/my-file - actions: - - type: None logLevel: Normal managementState: Managed + nodeDisruptionPolicy: + files: + - actions: + - type: None + path: /etc/my-file operatorLogLevel: Normal status: nodeDisruptionPolicyStatus: clusterPolicies: files: - - actions: - - type: None - path: /etc/my-file - actions: - type: None path: /var/lib/kubelet/config.json @@ -118,16 +132,35 @@ status: - actions: - type: Special path: /etc/containers/registries.conf + - actions: + - reload: + serviceName: crio.service + type: Reload + path: /etc/containers/registries.d + - actions: + - type: None + path: /etc/nmstate/openshift + - actions: + - restart: + serviceName: coreos-update-ca-trust.service + type: Restart + - restart: + serviceName: crio.service + type: Restart + path: /etc/pki/ca-trust/source/anchors/openshift-config-user-ca-bundle.crt + - actions: + - type: None + path: /etc/my-file sshkey: actions: - type: None - readyReplicas: 0 + observedGeneration: 10 ``` For this initial implementation the policy supports MachineConfig changes to the following: -- Files +- Files & Directories - Units - sshKeys diff --git a/pkg/apihelpers/apihelpers.go b/pkg/apihelpers/apihelpers.go index db5da091cd..71e8a21b32 100644 --- a/pkg/apihelpers/apihelpers.go +++ b/pkg/apihelpers/apihelpers.go @@ -9,6 +9,7 @@ import ( mcfgv1 "github.com/openshift/api/machineconfiguration/v1" opv1 "github.com/openshift/api/operator/v1" + "github.com/openshift/machine-config-operator/pkg/daemon/constants" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -22,7 +23,7 @@ var ( defaultClusterPolicies = opv1.NodeDisruptionPolicyClusterStatus{ Files: []opv1.NodeDisruptionPolicyStatusFile{ { - Path: "/var/lib/kubelet/config.json", + Path: constants.KubeletAuthFile, Actions: []opv1.NodeDisruptionPolicyStatusAction{ { Type: opv1.NoneStatusAction, @@ -30,7 +31,7 @@ var ( }, }, { - Path: "/etc/machine-config-daemon/no-reboot/containers-gpg.pub", + Path: constants.GPGNoRebootPath, Actions: []opv1.NodeDisruptionPolicyStatusAction{ { Type: opv1.ReloadStatusAction, @@ -41,7 +42,7 @@ var ( }, }, { - Path: "/etc/containers/policy.json", + Path: constants.ContainerRegistryPolicyPath, Actions: []opv1.NodeDisruptionPolicyStatusAction{ { Type: opv1.ReloadStatusAction, @@ -52,13 +53,49 @@ var ( }, }, { - Path: "/etc/containers/registries.conf", + Path: constants.ContainerRegistryConfPath, Actions: []opv1.NodeDisruptionPolicyStatusAction{ { Type: opv1.SpecialStatusAction, }, }, }, + { + Path: constants.SigstoreRegistriesConfigDir, + Actions: []opv1.NodeDisruptionPolicyStatusAction{ + { + Type: opv1.ReloadStatusAction, + Reload: &opv1.ReloadService{ + ServiceName: "crio.service", + }, + }, + }, + }, + { + Path: constants.OpenShiftNMStateConfigDir, + Actions: []opv1.NodeDisruptionPolicyStatusAction{ + { + Type: opv1.NoneStatusAction, + }, + }, + }, + { + Path: constants.UserCABundlePath, + Actions: []opv1.NodeDisruptionPolicyStatusAction{ + { + Type: opv1.RestartStatusAction, + Restart: &opv1.RestartService{ + ServiceName: "coreos-update-ca-trust.service", + }, + }, + { + Type: opv1.RestartStatusAction, + Restart: &opv1.RestartService{ + ServiceName: "crio.service", + }, + }, + }, + }, }, SSHKey: opv1.NodeDisruptionPolicyStatusSSHKey{ Actions: []opv1.NodeDisruptionPolicyStatusAction{ diff --git a/pkg/controller/common/helpers.go b/pkg/controller/common/helpers.go index a0f1174767..b00f802a36 100644 --- a/pkg/controller/common/helpers.go +++ b/pkg/controller/common/helpers.go @@ -12,6 +12,7 @@ import ( "io/fs" "net/url" "os" + "path/filepath" "reflect" "sort" @@ -53,6 +54,7 @@ import ( "k8s.io/klog/v2" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" + opv1 "github.com/openshift/api/operator/v1" mcfgclientset "github.com/openshift/client-go/machineconfiguration/clientset/versioned" "github.com/openshift/client-go/machineconfiguration/clientset/versioned/scheme" ) @@ -1206,3 +1208,35 @@ func DoARebuild(pool *mcfgv1.MachineConfigPool) bool { return ok } + +// isSubdirectory checks if targetPath is a subdirectory of dirPath. +func IsSubdirectory(dirPath, targetPath string) bool { + // Clean and add trailing separator to dirPath to ensure proper matching + dirPath = filepath.Clean(dirPath) + string(filepath.Separator) + targetPath = filepath.Clean(targetPath) + + // Check if targetPath has dirPath as its prefix + return strings.HasPrefix(targetPath, dirPath) +} + +func FindClosestFilePolicyPathMatch(diffPath string, filePolicies []opv1.NodeDisruptionPolicyStatusFile) (bool, []opv1.NodeDisruptionPolicyStatusAction) { + matchLength := 0 + matchFound := false + matchActions := []opv1.NodeDisruptionPolicyStatusAction{} + + for _, filePolicy := range filePolicies { + klog.V(4).Infof("comparing policy path %s to diff path %s", filePolicy.Path, diffPath) + // Check if either of the following are true: + // (i) if diffPath and filePolicy.Path are an exact match + // (ii) if diffPath is a subdir of filePolicy.Path + if (diffPath == filePolicy.Path) || IsSubdirectory(filePolicy.Path, diffPath) { + // If a match was found, compare the length so the longest match is preserved + if len(filePolicy.Path) > matchLength { + matchFound = true + matchLength = len(filePolicy.Path) + matchActions = filePolicy.Actions + } + } + } + return matchFound, matchActions +} diff --git a/pkg/daemon/constants/constants.go b/pkg/daemon/constants/constants.go index f7ee614a28..2423ed8bf1 100644 --- a/pkg/daemon/constants/constants.go +++ b/pkg/daemon/constants/constants.go @@ -94,9 +94,15 @@ const ( // changes to registries.conf will cause a crio reload and require extra logic about whether to drain ContainerRegistryConfPath = "/etc/containers/registries.conf" + // changes to registries.conf will cause a crio reload + ContainerRegistryPolicyPath = "/etc/containers/policy.json" + // changes to registries.d will cause a crio reload SigstoreRegistriesConfigDir = "/etc/containers/registries.d" + // changes to openshift-config-user-ca-bundle.crt will cause an update-ca-trust and crio restart + UserCABundlePath = "/etc/pki/ca-trust/source/anchors/openshift-config-user-ca-bundle.crt" + // Changes to this directory should not trigger reboots because they are firstboot-only OpenShiftNMStateConfigDir = "/etc/nmstate/openshift" @@ -124,4 +130,8 @@ const ( // MinFreeStorageAfterPrefetch is the minimum amount of storage // available on the root filesystem after prefetching images. MinFreeStorageAfterPrefetch = "16Gi" + + // GPGNoRebootPath is the path MCO expects will contain GPG key updates. MCO will attempt to only reload crio for + // changes to this path. Note that other files added to the parent directory will not be handled specially + GPGNoRebootPath = "/etc/machine-config-daemon/no-reboot/containers-gpg.pub" ) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index e6b575491b..e8ebee212d 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -64,10 +64,6 @@ const ( // Rebooting is still the default scenario for any other change postConfigChangeActionReboot = "reboot" - // GPGNoRebootPath is the path MCO expects will contain GPG key updates. MCO will attempt to only reload crio for - // changes to this path. Note that other files added to the parent directory will not be handled specially - GPGNoRebootPath = "/etc/machine-config-daemon/no-reboot/containers-gpg.pub" - // ImageRegistryDrainOverrideConfigmap is the name of the Configmap a user can apply to force all // image registry changes to not drain ImageRegistryDrainOverrideConfigmap = "image-registry-override-drain" @@ -572,18 +568,18 @@ func (dn *CoreOSDaemon) applyOSChanges(mcDiff machineConfigDiff, oldConfig, newC func calculatePostConfigChangeActionFromMCDiffs(diffFileSet []string) (actions []string) { filesPostConfigChangeActionNone := []string{ caBundleFilePath, - "/var/lib/kubelet/config.json", + constants.KubeletAuthFile, } directoriesPostConfigChangeActionNone := []string{ constants.OpenShiftNMStateConfigDir, } filesPostConfigChangeActionReloadCrio := []string{ constants.ContainerRegistryConfPath, - GPGNoRebootPath, - "/etc/containers/policy.json", + constants.GPGNoRebootPath, + constants.ContainerRegistryPolicyPath, } filesPostConfigChangeActionRestartCrio := []string{ - "/etc/pki/ca-trust/source/anchors/openshift-config-user-ca-bundle.crt", + constants.UserCABundlePath, } dirsPostConfigChangeActionReloadCrio := []string{ constants.SigstoreRegistriesConfigDir, @@ -622,31 +618,11 @@ func calculatePostConfigChangeNodeDisruptionActionFromMCDiffs(diffSSH bool, diff // Step through all file based policies, and build out the actions object for _, diffPath := range diffFileSet { - pathFound := false - for _, policyFile := range clusterPolicies.Files { - klog.V(4).Infof("comparing policy path %s to diff path %s", policyFile.Path, diffPath) - if policyFile.Path == diffPath { - klog.Infof("NodeDisruptionPolicy found for diff file %s", diffPath) - actions = append(actions, policyFile.Actions...) - pathFound = true - break - } - } - if !pathFound { - // Hack for https://github.com/openshift/machine-config-operator/pull/4160#discussion_r1548669673 here - // Changes to files in /etc/containers/registries.d should cause a CRIO reload - // This should be removed once NodeDisruptionPolicy can support directories and wildcards - if filepath.Dir(diffPath) == constants.SigstoreRegistriesConfigDir { - klog.Infof("Exception Action: diffPath %s is a subdir of %s, adding a CRIO reload", diffPath, constants.SigstoreRegistriesConfigDir) - actions = append(actions, opv1.NodeDisruptionPolicyStatusAction{Type: opv1.ReloadStatusAction, Reload: &opv1.ReloadService{ - ServiceName: constants.CRIOServiceName, - }}) - continue - } - if filepath.Dir(diffPath) == constants.OpenShiftNMStateConfigDir { - klog.Infof("Exception Action: diffPath %s is a subdir of %s, skipping reboot", diffPath, constants.OpenShiftNMStateConfigDir) - continue - } + pathFound, actionsFound := ctrlcommon.FindClosestFilePolicyPathMatch(diffPath, clusterPolicies.Files) + if pathFound { + klog.Infof("NodeDisruptionPolicy %v found for diff file %s", actionsFound, diffPath) + actions = append(actions, actionsFound...) + } else { // If this file path has no policy defined, default to reboot klog.V(4).Infof("no policy found for diff path %s", diffPath) return []opv1.NodeDisruptionPolicyStatusAction{{ @@ -661,7 +637,7 @@ func calculatePostConfigChangeNodeDisruptionActionFromMCDiffs(diffSSH bool, diff for _, policyUnit := range clusterPolicies.Units { klog.V(4).Infof("comparing policy unit name %s to diff unit name %s", string(policyUnit.Name), diffUnit) if string(policyUnit.Name) == diffUnit { - klog.Infof("NodeDisruptionPolicy found for diff unit %s!", diffUnit) + klog.Infof("NodeDisruptionPolicy %v found for diff unit %s!", policyUnit.Actions, diffUnit) actions = append(actions, policyUnit.Actions...) unitFound = true break @@ -678,7 +654,7 @@ func calculatePostConfigChangeNodeDisruptionActionFromMCDiffs(diffSSH bool, diff // SSH only has one possible policy(and there is a default), so blindly add that if there is an SSH diff if diffSSH { - klog.Infof("SSH diff detected, applying SSH policy") + klog.Infof("SSH diff detected, applying SSH policy %v", clusterPolicies.SSHKey.Actions) actions = append(actions, clusterPolicies.SSHKey.Actions...) } diff --git a/pkg/daemon/update_test.go b/pkg/daemon/update_test.go index 1aeebed01d..e794cf480c 100644 --- a/pkg/daemon/update_test.go +++ b/pkg/daemon/update_test.go @@ -16,6 +16,7 @@ import ( ign3types "github.com/coreos/ignition/v2/config/v3_4/types" mcfgv1 "github.com/openshift/api/machineconfiguration/v1" + opv1 "github.com/openshift/api/operator/v1" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" "github.com/openshift/machine-config-operator/pkg/daemon/osrelease" "github.com/openshift/machine-config-operator/test/helpers" @@ -707,3 +708,123 @@ func TestIsImagePresent(t *testing.T) { assert.Nil(t, err) assert.False(t, imagePresent) } + +func TestFindClosestFilePolicyPathMatch(t *testing.T) { + + policyActions := map[string][]opv1.NodeDisruptionPolicyStatusAction{ + "Empty": {}, + "None": {{Type: opv1.NoneStatusAction}}, + "Reboot": {{Type: opv1.RebootStatusAction}}, + "RestartCrio": {{Type: opv1.RestartStatusAction, Restart: &opv1.RestartService{ServiceName: "crio.service"}}}, + "ReloadCrio": {{Type: opv1.ReloadStatusAction, Reload: &opv1.ReloadService{ServiceName: "crio.service"}}}} + + tests := []struct { + diffPath string + filePolicies []opv1.NodeDisruptionPolicyStatusFile + expectedPathFound bool + expectedActionsFound []opv1.NodeDisruptionPolicyStatusAction + }{ + { + // test that an file with no policies returns no actions + diffPath: "/etc/mco/test", + filePolicies: []opv1.NodeDisruptionPolicyStatusFile{ + { + Path: "/etc/example", + Actions: policyActions["None"], + }, + }, + expectedPathFound: false, + expectedActionsFound: policyActions["Empty"], + }, + { + // test that a file with a valid policy returns the correct actions + diffPath: "/etc/mco/test1", + filePolicies: []opv1.NodeDisruptionPolicyStatusFile{ + { + Path: "/etc/mco/test1", + Actions: policyActions["None"], + }, + { + Path: "/etc/mco/test2", + Actions: policyActions["Reboot"], + }, + }, + expectedPathFound: true, + expectedActionsFound: policyActions["None"], + }, + { + // test that a file with multiple path policies returns the correct actions(closest match) + diffPath: "/etc/mco/f1/f2/example", + filePolicies: []opv1.NodeDisruptionPolicyStatusFile{ + { + Path: "/etc/mco/f1", + Actions: policyActions["None"], + }, + { + Path: "/etc/mco/f1/f2", + Actions: policyActions["Reboot"], + }, + { + Path: "/etc/mco/f1/f2/f3", + Actions: policyActions["ReloadCrio"], + }, + }, + expectedPathFound: true, + expectedActionsFound: policyActions["Reboot"], + }, + { + // test that a file with multiple path policies returns the correct actions when there is an exact match + diffPath: "/etc/mco/f1/f2/f3", + filePolicies: []opv1.NodeDisruptionPolicyStatusFile{ + { + Path: "/etc/mco/f1", + Actions: policyActions["None"], + }, + { + Path: "/etc/mco/f1/f2", + Actions: policyActions["Reboot"], + }, + { + Path: "/etc/mco/f1/f2/f3", + Actions: policyActions["ReloadCrio"], + }, + }, + expectedPathFound: true, + expectedActionsFound: policyActions["ReloadCrio"], + }, + { + // test that a file with a partial parent dir match, but no actual dir match does not return any action + diffPath: "/etc/mco/f1/f2/f3", + filePolicies: []opv1.NodeDisruptionPolicyStatusFile{ + { + Path: "/etc/mco/f1/test", + Actions: policyActions["None"], + }, + { + Path: "/etc/mco/f1/f2/test", + Actions: policyActions["Reboot"], + }, + { + Path: "/etc/mco/f1/f2/f3/f4", + Actions: policyActions["ReloadCrio"], + }, + }, + expectedPathFound: false, + expectedActionsFound: policyActions["Empty"], + }, + } + + for idx, test := range tests { + t.Run(fmt.Sprintf("case#%d", idx), func(t *testing.T) { + + pathFound, actionsFound := ctrlcommon.FindClosestFilePolicyPathMatch(test.diffPath, test.filePolicies) + + if !reflect.DeepEqual(test.expectedPathFound, pathFound) { + t.Errorf("Failed finding node disruption file policy action: expected: %v but result is: %v.", test.expectedPathFound, pathFound) + } + if !reflect.DeepEqual(test.expectedActionsFound, actionsFound) { + t.Errorf("Failed calculating node disruption file policy action: expected: %v but result is: %v.", test.expectedActionsFound, actionsFound) + } + }) + } +}