From 07989071308ef1fdb9ecf3de2097cdada84b7201 Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Mon, 7 Aug 2023 14:35:42 -0500 Subject: [PATCH 1/9] Add nmstate-configuration service Adds a service to apply NMState configs early in the boot process. If the new service applies changes we will skip running configure-ovs since br-ex should have been included in the manual configuration. Also makes the /etc/nmstate directory a reboot-less path so when we make changes to it while scaling out new nodes it won't cause all of the existing nodes to reboot. The files there are only applied once on initial boot anyway so there's no need to reboot for changes. --- pkg/daemon/update.go | 13 ++++++++- .../_base/files/configure-ovs-network.yaml | 5 ++++ .../_base/files/nmstate-configuration.yaml | 29 +++++++++++++++++++ .../units/nmstate-configuration.service.yaml | 25 ++++++++++++++++ 4 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 templates/common/_base/files/nmstate-configuration.yaml create mode 100644 templates/common/_base/units/nmstate-configuration.service.yaml diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 5ec76a396a..19ba2ab51b 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -572,6 +572,12 @@ func calculatePostConfigChangeActionFromMCDiffs(diffFileSet []string) (actions [ imageRegistryAuthFile, "/var/lib/kubelet/config.json", } + directoriesPostConfigChangeActionNone := []string{ + // We probably don't want to use this exact path because NMState is + // planning to add a service that applies configs from it too, and we + // want to make sure our service is the only one processing the configs. + "/etc/nmstate", + } filesPostConfigChangeActionReloadCrio := []string{ constants.ContainerRegistryConfPath, GPGNoRebootPath, @@ -585,8 +591,13 @@ func calculatePostConfigChangeActionFromMCDiffs(diffFileSet []string) (actions [ } actions = []string{postConfigChangeActionNone} - +path: for _, path := range diffFileSet { + for _, dir := range directoriesPostConfigChangeActionNone { + if strings.HasPrefix(path, dir) { + continue path + } + } if ctrlcommon.InSlice(path, filesPostConfigChangeActionNone) { continue } else if ctrlcommon.InSlice(path, filesPostConfigChangeActionReloadCrio) { diff --git a/templates/common/_base/files/configure-ovs-network.yaml b/templates/common/_base/files/configure-ovs-network.yaml index bc2f78c630..1e5dd58943 100644 --- a/templates/common/_base/files/configure-ovs-network.yaml +++ b/templates/common/_base/files/configure-ovs-network.yaml @@ -5,6 +5,11 @@ contents: #!/bin/bash set -x + if [ -e /etc/nmstate/openshift/applied ]; then + echo "Skipping configure-ovs due to manual network configuration" + exit 0 + fi + # This file is not needed anymore in 4.7+, but when rolling back to 4.6 # the ovs pod needs it to know ovs is running on the host. touch /var/run/ovs-config-executed diff --git a/templates/common/_base/files/nmstate-configuration.yaml b/templates/common/_base/files/nmstate-configuration.yaml new file mode 100644 index 0000000000..82c29d4b02 --- /dev/null +++ b/templates/common/_base/files/nmstate-configuration.yaml @@ -0,0 +1,29 @@ +mode: 0755 +path: "/usr/local/bin/nmstate-configuration.sh" +contents: + inline: | + #!/bin/bash + set -eux + + if [ -e /etc/nmstate/openshift/applied ]; then + # Already done, don't apply again + exit 0 + fi + + hostname=$(hostname -s) + hostname_file="/etc/nmstate/openshift/${hostname}.yml" + cluster_file="/etc/nmstate/openshift/cluster.yml" + config_file="" + if [ -s $hostname_file ]; then + config_file=$hostname_file + fi + if [ -s $cluster_file ]; then + config_file=$cluster_file + fi + if [ -z $config_file ]; then + # No config to apply + exit 0 + fi + + cp $config_file /etc/nmstate + touch /etc/nmstate/openshift/applied diff --git a/templates/common/_base/units/nmstate-configuration.service.yaml b/templates/common/_base/units/nmstate-configuration.service.yaml new file mode 100644 index 0000000000..4afd754a26 --- /dev/null +++ b/templates/common/_base/units/nmstate-configuration.service.yaml @@ -0,0 +1,25 @@ +name: nmstate-configuration.service +enabled: true +contents: | + [Unit] + Description=Applies per-node NMState network configuration + Requires=openvswitch.service + Wants=NetworkManager-wait-online.service + After=NetworkManager-wait-online.service openvswitch.service network.service nodeip-configuration.service + Before=nmstate.service kubelet-dependencies.target kubelet.service crio.service ovs-configuration.service node-valid-hostname.service + + [Service] + Type=oneshot + # Would prefer to do Restart=on-failure instead of this bash retry loop, but + # the version of systemd we have right now doesn't support it. It should be + # available in systemd v244 and higher. + ExecStart=/usr/local/bin/nmstate-configuration.sh + StandardOutput=journal+console + StandardError=journal+console + + {{if .Proxy -}} + EnvironmentFile=/etc/mco/proxy.env + {{end -}} + + [Install] + WantedBy=network-online.target From cad8c273eca50ba21977981cb411b84f08a94cf0 Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Mon, 12 Feb 2024 16:20:48 -0600 Subject: [PATCH 2/9] Tweak dependencies to reflect kubelet-dependencies --- templates/common/_base/units/nmstate-configuration.service.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/common/_base/units/nmstate-configuration.service.yaml b/templates/common/_base/units/nmstate-configuration.service.yaml index 4afd754a26..c616c75f60 100644 --- a/templates/common/_base/units/nmstate-configuration.service.yaml +++ b/templates/common/_base/units/nmstate-configuration.service.yaml @@ -6,7 +6,7 @@ contents: | Requires=openvswitch.service Wants=NetworkManager-wait-online.service After=NetworkManager-wait-online.service openvswitch.service network.service nodeip-configuration.service - Before=nmstate.service kubelet-dependencies.target kubelet.service crio.service ovs-configuration.service node-valid-hostname.service + Before=nmstate.service kubelet-dependencies.target ovs-configuration.service node-valid-hostname.service [Service] Type=oneshot From 171223f0602f5f4a1325915c8aff14bec86c1e4e Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Thu, 22 Feb 2024 15:20:38 -0600 Subject: [PATCH 3/9] Add wait-for-primary-ip service --- .../_base/files/wait-for-primary-ip.yaml | 31 +++++++++++++++++++ .../_base/units/wait-for-primary-ip.yaml | 23 ++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 templates/common/_base/files/wait-for-primary-ip.yaml create mode 100644 templates/common/_base/units/wait-for-primary-ip.yaml diff --git a/templates/common/_base/files/wait-for-primary-ip.yaml b/templates/common/_base/files/wait-for-primary-ip.yaml new file mode 100644 index 0000000000..b80b1776cb --- /dev/null +++ b/templates/common/_base/files/wait-for-primary-ip.yaml @@ -0,0 +1,31 @@ +mode: 0755 +path: "/usr/local/bin/wait-for-primary-ip.sh" +contents: + inline: | + #!/bin/bash + set -eux + + if [ ! -e /etc/nmstate/openshift/applied ]; then + # No need to do this if no NMState configuration was applied + exit 0 + fi + + # This logic is borrowed from configure-ovs.sh + # TODO: Find a platform-agnostic way to do this. It won't work on platforms where + # nodeip-configuration is not used. + ip=$(cat /run/nodeip-configuration/primary-ip) + if [[ "${ip}" == "" ]]; then + echo "No ip to bind was found" + exit 1 + fi + while : + do + random_port=$(shuf -i 50000-60000 -n 1) + echo "Trying to bind ${ip} on port ${random_port}" + exit_code=$(timeout 2s nc -l "${ip}" ${random_port}; echo $?) + if [[ exit_code -eq 124 ]]; then + echo "Address bound successfully" + exit 0 + fi + sleep 10 + done diff --git a/templates/common/_base/units/wait-for-primary-ip.yaml b/templates/common/_base/units/wait-for-primary-ip.yaml new file mode 100644 index 0000000000..685a0cb1df --- /dev/null +++ b/templates/common/_base/units/wait-for-primary-ip.yaml @@ -0,0 +1,23 @@ +name: wait-for-primary-ip.service +enabled: true +contents: | + [Unit] + Description=Ensure primary IP is assigned and usable + Requires=nmstate.service + After=nmstate.service + Before=kubelet-dependencies.target + + [Service] + Type=oneshot + Restart=on-failure + RestartSec=10 + ExecStart=/usr/local/bin/wait-for-primary-ip.sh + StandardOutput=journal+console + StandardError=journal+console + + {{if .Proxy -}} + EnvironmentFile=/etc/mco/proxy.env + {{end -}} + + [Install] + WantedBy=network-online.target From 7a94ef88803e589e9a2330b12b8fcc13c30c3af0 Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Wed, 3 Apr 2024 11:06:14 -0500 Subject: [PATCH 4/9] Change reboot-less path to /etc/nmstate/openshift --- pkg/daemon/update.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 19ba2ab51b..e4e5d35d12 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -576,7 +576,7 @@ func calculatePostConfigChangeActionFromMCDiffs(diffFileSet []string) (actions [ // We probably don't want to use this exact path because NMState is // planning to add a service that applies configs from it too, and we // want to make sure our service is the only one processing the configs. - "/etc/nmstate", + "/etc/nmstate/openshift", } filesPostConfigChangeActionReloadCrio := []string{ constants.ContainerRegistryConfPath, From 16b8327a9233b2b6c548a7b8a1b33dcaaff286fa Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Wed, 17 Apr 2024 14:02:43 -0500 Subject: [PATCH 5/9] Avoid overwriting existing files and cleanup mtu migration --- .../_base/files/nmstate-configuration.yaml | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/templates/common/_base/files/nmstate-configuration.yaml b/templates/common/_base/files/nmstate-configuration.yaml index 82c29d4b02..a37ee6af7a 100644 --- a/templates/common/_base/files/nmstate-configuration.yaml +++ b/templates/common/_base/files/nmstate-configuration.yaml @@ -5,19 +5,27 @@ contents: #!/bin/bash set -eux + # Clean up old config on behalf of mtu-migration + if ! systemctl -q is-enabled mtu-migration; then + echo "Cleaning up left over mtu migration configuration" + rm -rf /etc/cno/mtu-migration + fi + if [ -e /etc/nmstate/openshift/applied ]; then # Already done, don't apply again exit 0 fi + src_path="/etc/nmstate/openshift" + dst_path="/etc/nmstate" hostname=$(hostname -s) - hostname_file="/etc/nmstate/openshift/${hostname}.yml" - cluster_file="/etc/nmstate/openshift/cluster.yml" + hostname_file="${hostname}.yml" + cluster_file="cluster.yml" config_file="" - if [ -s $hostname_file ]; then + if [ -s "$src_path/$hostname_file" ]; then config_file=$hostname_file fi - if [ -s $cluster_file ]; then + if [ -s "$src_path/$cluster_file" ]; then config_file=$cluster_file fi if [ -z $config_file ]; then @@ -25,5 +33,10 @@ contents: exit 0 fi - cp $config_file /etc/nmstate + if [ -e "$dst_path/$config_file" ]; then + echo "ERROR: File $dst_path/$config_file exists. Refusing to overwrite." + exit 1 + fi + + cp "$src_path/$config_file" /etc/nmstate touch /etc/nmstate/openshift/applied From 16d796aa43ca7e17eb19fa2de5199b787753cc4b Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Fri, 26 Apr 2024 12:08:00 -0500 Subject: [PATCH 6/9] Use InSlice to check if file is in directory This significantly cleans up the logic. --- pkg/daemon/constants/constants.go | 3 +++ pkg/daemon/update.go | 13 +++---------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/pkg/daemon/constants/constants.go b/pkg/daemon/constants/constants.go index 264da4a545..f7ee614a28 100644 --- a/pkg/daemon/constants/constants.go +++ b/pkg/daemon/constants/constants.go @@ -97,6 +97,9 @@ const ( // changes to registries.d will cause a crio reload SigstoreRegistriesConfigDir = "/etc/containers/registries.d" + // Changes to this directory should not trigger reboots because they are firstboot-only + OpenShiftNMStateConfigDir = "/etc/nmstate/openshift" + // SSH Keys for user "core" will only be written at /home/core/.ssh CoreUserSSHPath = "/home/" + CoreUserName + "/.ssh" diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index e4e5d35d12..5cb49e4ecd 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -573,10 +573,7 @@ func calculatePostConfigChangeActionFromMCDiffs(diffFileSet []string) (actions [ "/var/lib/kubelet/config.json", } directoriesPostConfigChangeActionNone := []string{ - // We probably don't want to use this exact path because NMState is - // planning to add a service that applies configs from it too, and we - // want to make sure our service is the only one processing the configs. - "/etc/nmstate/openshift", + constants.OpenShiftNMStateConfigDir, } filesPostConfigChangeActionReloadCrio := []string{ constants.ContainerRegistryConfPath, @@ -591,13 +588,7 @@ func calculatePostConfigChangeActionFromMCDiffs(diffFileSet []string) (actions [ } actions = []string{postConfigChangeActionNone} -path: for _, path := range diffFileSet { - for _, dir := range directoriesPostConfigChangeActionNone { - if strings.HasPrefix(path, dir) { - continue path - } - } if ctrlcommon.InSlice(path, filesPostConfigChangeActionNone) { continue } else if ctrlcommon.InSlice(path, filesPostConfigChangeActionReloadCrio) { @@ -606,6 +597,8 @@ path: actions = []string{postConfigChangeActionRestartCrio} } else if ctrlcommon.InSlice(filepath.Dir(path), dirsPostConfigChangeActionReloadCrio) { actions = []string{postConfigChangeActionReloadCrio} + } else if ctrlcommon.InSlice(filepath.Dir(path), directoriesPostConfigChangeActionNone) { + continue } else { actions = []string{postConfigChangeActionReboot} return From 397b18edc8c4697aa0107c6a91af9a42623c3a4f Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Fri, 26 Apr 2024 12:13:11 -0500 Subject: [PATCH 7/9] Add exception to NodeDisruptionPolicy For now this can't handle directories, so we need to make an exception for it. --- pkg/daemon/update.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 5cb49e4ecd..152b85468a 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -634,6 +634,10 @@ func calculatePostConfigChangeNodeDisruptionActionFromMCDiffs(diffSSH bool, diff }}) continue } + if filepath.Dir(diffPath) == constants.OpenShiftNMStateConfigDir { + klog.Infof("Exception Action: diffPath %s is a subdir of %s, skipping reboot", diffPath, constants.OpenShiftNMStateConfigDir) + continue + } // If this file path has no policy defined, default to reboot klog.V(4).Infof("no policy found for diff path %s", diffPath) return []opv1.NodeDisruptionPolicyStatusAction{{ From addcb11ba66eba6bc958ef42fd0dbd066bef803f Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Mon, 6 May 2024 11:15:59 -0500 Subject: [PATCH 8/9] Add logging and fix logic in nmstate-configuration --- .../common/_base/files/nmstate-configuration.yaml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/templates/common/_base/files/nmstate-configuration.yaml b/templates/common/_base/files/nmstate-configuration.yaml index a37ee6af7a..f142b4041b 100644 --- a/templates/common/_base/files/nmstate-configuration.yaml +++ b/templates/common/_base/files/nmstate-configuration.yaml @@ -12,24 +12,22 @@ contents: fi if [ -e /etc/nmstate/openshift/applied ]; then - # Already done, don't apply again + echo "Configuration already applied, exiting" exit 0 fi src_path="/etc/nmstate/openshift" dst_path="/etc/nmstate" hostname=$(hostname -s) - hostname_file="${hostname}.yml" + host_file="${hostname}.yml" cluster_file="cluster.yml" config_file="" - if [ -s "$src_path/$hostname_file" ]; then + if [ -s "$src_path/$host_file" ]; then config_file=$hostname_file - fi - if [ -s "$src_path/$cluster_file" ]; then + elif [ -s "$src_path/$cluster_file" ]; then config_file=$cluster_file - fi - if [ -z $config_file ]; then - # No config to apply + else + echo "No configuration found at $src_path/$host_file or $src_path/$cluster_file" exit 0 fi From c02974ba2738982749100d8e9ad006d089ed3adc Mon Sep 17 00:00:00 2001 From: Ben Nemec Date: Mon, 6 May 2024 11:35:47 -0500 Subject: [PATCH 9/9] Enable nmstate-configuration only on baremetal For the initial implementation we've decided to target only baremetal since that's the main place where it is needed. --- .../common/{_base => baremetal}/files/nmstate-configuration.yaml | 0 .../{_base => baremetal}/units/nmstate-configuration.service.yaml | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename templates/common/{_base => baremetal}/files/nmstate-configuration.yaml (100%) rename templates/common/{_base => baremetal}/units/nmstate-configuration.service.yaml (100%) diff --git a/templates/common/_base/files/nmstate-configuration.yaml b/templates/common/baremetal/files/nmstate-configuration.yaml similarity index 100% rename from templates/common/_base/files/nmstate-configuration.yaml rename to templates/common/baremetal/files/nmstate-configuration.yaml diff --git a/templates/common/_base/units/nmstate-configuration.service.yaml b/templates/common/baremetal/units/nmstate-configuration.service.yaml similarity index 100% rename from templates/common/_base/units/nmstate-configuration.service.yaml rename to templates/common/baremetal/units/nmstate-configuration.service.yaml