Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions pkg/daemon/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (

"github.com/BurntSushi/toml"
"github.com/containers/image/v5/pkg/sysregistriesv2"
ign3types "github.com/coreos/ignition/v2/config/v3_2/types"
"github.com/golang/glog"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
"github.com/openshift/machine-config-operator/pkg/daemon/constants"
Expand Down Expand Up @@ -153,15 +152,17 @@ func (dn *Daemon) performDrain() error {
return nil
}

type ReadFileFunc func(string) ([]byte, error)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks totally fine to me as is, but I'm thinking we may want to make this an interface in the future?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


// isDrainRequired determines whether node drain is required or not to apply config changes.
func isDrainRequired(actions, diffFileSet []string, oldIgnConfig, newIgnConfig ign3types.Config) (bool, error) {
func isDrainRequired(actions, diffFileSet []string, readOldFile, readNewFile ReadFileFunc) (bool, error) {
if ctrlcommon.InSlice(postConfigChangeActionReboot, actions) {
// Node is going to reboot, we definitely want to perform drain
return true, nil
} else if ctrlcommon.InSlice(postConfigChangeActionReloadCrio, actions) {
// Drain may or may not be necessary in case of container registry config changes.
if ctrlcommon.InSlice(constants.ContainerRegistryConfPath, diffFileSet) {
isSafe, err := isSafeContainerRegistryConfChanges(oldIgnConfig, newIgnConfig)
isSafe, err := isSafeContainerRegistryConfChanges(readOldFile, readNewFile)
if err != nil {
return false, err
}
Expand All @@ -175,6 +176,20 @@ func isDrainRequired(actions, diffFileSet []string, oldIgnConfig, newIgnConfig i
return true, nil
}

func (dn *Daemon) drainIfRequired(actions, diffFileSet []string, readOldFile, readNewFile ReadFileFunc) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain what this is doing? Because I don't quite understand or know if this refactor should be pulled into master without accompanying code to make sense of it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just factoring code out of update() so it doesn't have to be duplicated when we want to perform drains for a layered update

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't help but feel like that's should probably live with the other code that needs it? Feels weird to merge it if it's needed for something that isn't in master or a clear optimization for master

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The counterbalance here is avoiding a later merge of layering into master being too large and painful, as well as the ongoing cost of rebases.

It's highly likely that there will be an intersection between layering and hypershift work for example. And having prep patches merged into master helps avoid conflicts there too.

In this particular case, I think

  • The code isn't significantly better or anything, but IMO it is a bit better
  • The code isn't difficult to review
  • CI covers all the functionality here

IMO, that on balance argues for a master merge.

drain, err := isDrainRequired(actions, diffFileSet, readOldFile, readNewFile)
if err != nil {
return err
}

if drain {
return dn.performDrain()
}

glog.Info("Changes do not require drain, skipping.")
return nil
}

// isSafeContainerRegistryConfChanges looks inside old and new versions of registries.conf file.
// It compares the content and determines whether changes made are safe or not. This will
// help MCD to decide whether we can skip node drain for applied changes into container
Expand All @@ -184,16 +199,16 @@ func isDrainRequired(actions, diffFileSet []string, oldIgnConfig, newIgnConfig i
// 2. A new registry has been added that has `mirror-by-digest-only=true`
// See https://bugzilla.redhat.com/show_bug.cgi?id=1943315
//nolint:gocyclo
func isSafeContainerRegistryConfChanges(oldIgnConfig, newIgnConfig ign3types.Config) (bool, error) {
func isSafeContainerRegistryConfChanges(readOldFile, readNewFile ReadFileFunc) (bool, error) {
// /etc/containers/registries.conf contains config in toml format. Parse the file
oldData, err := ctrlcommon.GetIgnitionFileDataByPath(&oldIgnConfig, constants.ContainerRegistryConfPath)
oldData, err := readOldFile(constants.ContainerRegistryConfPath)
if err != nil {
return false, fmt.Errorf("Failed decoding Data URL scheme string: %v", err)
return false, fmt.Errorf("failed to get old registries.conf content: %w", err)
}

newData, err := ctrlcommon.GetIgnitionFileDataByPath(&newIgnConfig, constants.ContainerRegistryConfPath)
newData, err := readNewFile(constants.ContainerRegistryConfPath)
if err != nil {
return false, fmt.Errorf("Failed decoding Data URL scheme string %v", err)
return false, fmt.Errorf("failed to get new registries.conf content: %w", err)
}

tomlConfOldReg := sysregistriesv2.V2RegistriesConf{}
Expand Down
2 changes: 1 addition & 1 deletion pkg/daemon/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ location = "example.com/repo/test-img"
t.Errorf("parsing new Ignition config failed: %v", err)
}
diffFileSet := ctrlcommon.CalculateConfigFileDiffs(&oldIgnConfig, &newIgnConfig)
drain, err := isDrainRequired(test.actions, diffFileSet, oldIgnConfig, newIgnConfig)
drain, err := isDrainRequired(test.actions, diffFileSet, getIgnitionFileDataReadFunc(&oldIgnConfig), getIgnitionFileDataReadFunc(&newIgnConfig))
if !reflect.DeepEqual(test.expectedAction, drain) {
t.Errorf("Failed determining drain behavior: expected: %v but result is: %v. Error: %v", test.expectedAction, drain, err)
}
Expand Down
52 changes: 31 additions & 21 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,6 @@ func reloadService(name string) error {
}

// performPostConfigChangeAction takes action based on what postConfigChangeAction has been asked.
// For non-reboot action, it applies configuration, updates node's config and state.
// In the end uncordon node to schedule workload.
// If at any point an error occurs, we reboot the node so that node has correct configuration.
Comment on lines -119 to -121
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these comments out of date? Or am I misunderstanding how error handling works here? It looks like it just returns errors

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right that those bits got moved out? Though honestly I am having trouble following all of the flow.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yeah I think I did that here: d200340

Without properly updating the comment

func (dn *Daemon) performPostConfigChangeAction(postConfigChangeActions []string, configName string) error {
if ctrlcommon.InSlice(postConfigChangeActionReboot, postConfigChangeActions) {
dn.logSystem("Rebooting node")
Expand Down Expand Up @@ -147,10 +144,10 @@ func (dn *Daemon) performPostConfigChangeAction(postConfigChangeActions []string
}
dn.logSystem("%s config reloaded successfully! Desired config %s has been applied, skipping reboot", serviceName, configName)
}
return nil
}

// We are here, which means reboot was not needed to apply the configuration.

// Get current state of node, in case of an error reboot
func (dn *Daemon) getAndUpdateConfigAndState(configName string) error {
state, err := dn.getStateAndConfigs(configName)
if err != nil {
return fmt.Errorf("Could not apply update: error processing state and configs. Error: %v", err)
Expand Down Expand Up @@ -484,11 +481,7 @@ func (dn *Daemon) calculatePostConfigChangeActionWithMCDiff(diff *machineConfigD
return dn.calculatePostConfigChangeActionFromFiles(diffFileSet)
}

// update the node to the provided node configuration.
func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr error) {

oldConfig = canonicalizeEmptyMC(oldConfig)

func (dn *Daemon) setWorking() error {
if dn.nodeWriter != nil {
state, err := getNodeAnnotationExt(dn.node, constants.MachineConfigDaemonStateAnnotationKey, true)
if err != nil {
Expand All @@ -500,6 +493,21 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr err
}
}
}
return nil
}
Copy link
Contributor

@kikisdeliveryservice kikisdeliveryservice Mar 15, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar comment here - i dont really understand why this is necessary or how it fits into things?


// this should probably become part of the implementation of an Updater interface
func getIgnitionFileDataReadFunc(ignConfig *ign3types.Config) ReadFileFunc {
return func(path string) ([]byte, error) {
return ctrlcommon.GetIgnitionFileDataByPath(ignConfig, path)
}
}

// update the node to the provided node configuration.
func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr error) {
if err := dn.setWorking(); err != nil {
return fmt.Errorf("failed to set working: %w", err)
}

dn.catchIgnoreSIGTERM()
defer func() {
Expand All @@ -508,6 +516,7 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr err
dn.cancelSIGTERM()
}()

oldConfig = canonicalizeEmptyMC(oldConfig)
oldConfigName := oldConfig.GetName()
newConfigName := newConfig.GetName()

Expand Down Expand Up @@ -547,17 +556,9 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr err
}

// Check and perform node drain if required
drain, err := isDrainRequired(actions, diffFileSet, oldIgnConfig, newIgnConfig)
if err != nil {
if err := dn.drainIfRequired(actions, diffFileSet, getIgnitionFileDataReadFunc(&oldIgnConfig), getIgnitionFileDataReadFunc(&newIgnConfig)); err != nil {
return err
}
if drain {
if err := dn.performDrain(); err != nil {
return err
}
} else {
glog.Info("Changes do not require drain, skipping.")
}

// update files on disk that need updating
if err := dn.updateFiles(oldIgnConfig, newIgnConfig); err != nil {
Expand Down Expand Up @@ -626,7 +627,16 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr err
return err
}

return dn.performPostConfigChangeAction(actions, newConfig.GetName())
if err := dn.performPostConfigChangeAction(actions, newConfig.GetName()); err != nil {
return err
}

// if dn.skipReboot, a reboot might be performed manually after update(), in which case we don't want to update state
// else reboot was not needed to apply the configuration, and we need to update state
if !dn.skipReboot {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused at the use of skipReboot here. This is only for onceFrom right? Why is this specifically handled here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed adding this condition originally and it was causing bootstrap to fail. Looking at the old version of performPostConfigChangeAction it calls dn.reboot which I initially assumed would never return. But if dn.skipReboot == true, dn.reboot does return, and performPostConfigChangeAction returns early before updating state. So I had to add this to preserve that behavior. I can't say I fully understand why that's necessary because how state is managed is pretty unclear to me

return dn.getAndUpdateConfigAndState(newConfig.GetName())
}
return nil
}

// machineConfigDiff represents an ad-hoc difference between two MachineConfig objects.
Expand Down