diff --git a/pkg/daemon/constants/constants.go b/pkg/daemon/constants/constants.go index 264da4a545..f7ee614a28 100644 --- a/pkg/daemon/constants/constants.go +++ b/pkg/daemon/constants/constants.go @@ -97,6 +97,9 @@ const ( // changes to registries.d will cause a crio reload SigstoreRegistriesConfigDir = "/etc/containers/registries.d" + // Changes to this directory should not trigger reboots because they are firstboot-only + OpenShiftNMStateConfigDir = "/etc/nmstate/openshift" + // SSH Keys for user "core" will only be written at /home/core/.ssh CoreUserSSHPath = "/home/" + CoreUserName + "/.ssh" diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 5ec76a396a..152b85468a 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -572,6 +572,9 @@ func calculatePostConfigChangeActionFromMCDiffs(diffFileSet []string) (actions [ imageRegistryAuthFile, "/var/lib/kubelet/config.json", } + directoriesPostConfigChangeActionNone := []string{ + constants.OpenShiftNMStateConfigDir, + } filesPostConfigChangeActionReloadCrio := []string{ constants.ContainerRegistryConfPath, GPGNoRebootPath, @@ -585,7 +588,6 @@ func calculatePostConfigChangeActionFromMCDiffs(diffFileSet []string) (actions [ } actions = []string{postConfigChangeActionNone} - for _, path := range diffFileSet { if ctrlcommon.InSlice(path, filesPostConfigChangeActionNone) { continue @@ -595,6 +597,8 @@ func calculatePostConfigChangeActionFromMCDiffs(diffFileSet []string) (actions [ actions = []string{postConfigChangeActionRestartCrio} } else if ctrlcommon.InSlice(filepath.Dir(path), dirsPostConfigChangeActionReloadCrio) { actions = []string{postConfigChangeActionReloadCrio} + } else if ctrlcommon.InSlice(filepath.Dir(path), directoriesPostConfigChangeActionNone) { + continue } else { actions = []string{postConfigChangeActionReboot} return @@ -630,6 +634,10 @@ func calculatePostConfigChangeNodeDisruptionActionFromMCDiffs(diffSSH bool, diff }}) continue } + if filepath.Dir(diffPath) == constants.OpenShiftNMStateConfigDir { + klog.Infof("Exception Action: diffPath %s is a subdir of %s, skipping reboot", diffPath, constants.OpenShiftNMStateConfigDir) + continue + } // If this file path has no policy defined, default to reboot klog.V(4).Infof("no policy found for diff path %s", diffPath) return []opv1.NodeDisruptionPolicyStatusAction{{ diff --git a/templates/common/_base/files/configure-ovs-network.yaml b/templates/common/_base/files/configure-ovs-network.yaml index bc2f78c630..1e5dd58943 100644 --- a/templates/common/_base/files/configure-ovs-network.yaml +++ b/templates/common/_base/files/configure-ovs-network.yaml @@ -5,6 +5,11 @@ contents: #!/bin/bash set -x + if [ -e /etc/nmstate/openshift/applied ]; then + echo "Skipping configure-ovs due to manual network configuration" + exit 0 + fi + # This file is not needed anymore in 4.7+, but when rolling back to 4.6 # the ovs pod needs it to know ovs is running on the host. touch /var/run/ovs-config-executed diff --git a/templates/common/_base/files/wait-for-primary-ip.yaml b/templates/common/_base/files/wait-for-primary-ip.yaml new file mode 100644 index 0000000000..b80b1776cb --- /dev/null +++ b/templates/common/_base/files/wait-for-primary-ip.yaml @@ -0,0 +1,31 @@ +mode: 0755 +path: "/usr/local/bin/wait-for-primary-ip.sh" +contents: + inline: | + #!/bin/bash + set -eux + + if [ ! -e /etc/nmstate/openshift/applied ]; then + # No need to do this if no NMState configuration was applied + exit 0 + fi + + # This logic is borrowed from configure-ovs.sh + # TODO: Find a platform-agnostic way to do this. It won't work on platforms where + # nodeip-configuration is not used. + ip=$(cat /run/nodeip-configuration/primary-ip) + if [[ "${ip}" == "" ]]; then + echo "No ip to bind was found" + exit 1 + fi + while : + do + random_port=$(shuf -i 50000-60000 -n 1) + echo "Trying to bind ${ip} on port ${random_port}" + exit_code=$(timeout 2s nc -l "${ip}" ${random_port}; echo $?) + if [[ exit_code -eq 124 ]]; then + echo "Address bound successfully" + exit 0 + fi + sleep 10 + done diff --git a/templates/common/_base/units/wait-for-primary-ip.yaml b/templates/common/_base/units/wait-for-primary-ip.yaml new file mode 100644 index 0000000000..685a0cb1df --- /dev/null +++ b/templates/common/_base/units/wait-for-primary-ip.yaml @@ -0,0 +1,23 @@ +name: wait-for-primary-ip.service +enabled: true +contents: | + [Unit] + Description=Ensure primary IP is assigned and usable + Requires=nmstate.service + After=nmstate.service + Before=kubelet-dependencies.target + + [Service] + Type=oneshot + Restart=on-failure + RestartSec=10 + ExecStart=/usr/local/bin/wait-for-primary-ip.sh + StandardOutput=journal+console + StandardError=journal+console + + {{if .Proxy -}} + EnvironmentFile=/etc/mco/proxy.env + {{end -}} + + [Install] + WantedBy=network-online.target diff --git a/templates/common/baremetal/files/nmstate-configuration.yaml b/templates/common/baremetal/files/nmstate-configuration.yaml new file mode 100644 index 0000000000..f142b4041b --- /dev/null +++ b/templates/common/baremetal/files/nmstate-configuration.yaml @@ -0,0 +1,40 @@ +mode: 0755 +path: "/usr/local/bin/nmstate-configuration.sh" +contents: + inline: | + #!/bin/bash + set -eux + + # Clean up old config on behalf of mtu-migration + if ! systemctl -q is-enabled mtu-migration; then + echo "Cleaning up left over mtu migration configuration" + rm -rf /etc/cno/mtu-migration + fi + + if [ -e /etc/nmstate/openshift/applied ]; then + echo "Configuration already applied, exiting" + exit 0 + fi + + src_path="/etc/nmstate/openshift" + dst_path="/etc/nmstate" + hostname=$(hostname -s) + host_file="${hostname}.yml" + cluster_file="cluster.yml" + config_file="" + if [ -s "$src_path/$host_file" ]; then + config_file=$hostname_file + elif [ -s "$src_path/$cluster_file" ]; then + config_file=$cluster_file + else + echo "No configuration found at $src_path/$host_file or $src_path/$cluster_file" + exit 0 + fi + + if [ -e "$dst_path/$config_file" ]; then + echo "ERROR: File $dst_path/$config_file exists. Refusing to overwrite." + exit 1 + fi + + cp "$src_path/$config_file" /etc/nmstate + touch /etc/nmstate/openshift/applied diff --git a/templates/common/baremetal/units/nmstate-configuration.service.yaml b/templates/common/baremetal/units/nmstate-configuration.service.yaml new file mode 100644 index 0000000000..c616c75f60 --- /dev/null +++ b/templates/common/baremetal/units/nmstate-configuration.service.yaml @@ -0,0 +1,25 @@ +name: nmstate-configuration.service +enabled: true +contents: | + [Unit] + Description=Applies per-node NMState network configuration + Requires=openvswitch.service + Wants=NetworkManager-wait-online.service + After=NetworkManager-wait-online.service openvswitch.service network.service nodeip-configuration.service + Before=nmstate.service kubelet-dependencies.target ovs-configuration.service node-valid-hostname.service + + [Service] + Type=oneshot + # Would prefer to do Restart=on-failure instead of this bash retry loop, but + # the version of systemd we have right now doesn't support it. It should be + # available in systemd v244 and higher. + ExecStart=/usr/local/bin/nmstate-configuration.sh + StandardOutput=journal+console + StandardError=journal+console + + {{if .Proxy -}} + EnvironmentFile=/etc/mco/proxy.env + {{end -}} + + [Install] + WantedBy=network-online.target