Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pkg/daemon/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,13 @@ const (

// changes to registries.conf will cause a crio reload and require extra logic about whether to drain
ContainerRegistryConfPath = "/etc/containers/registries.conf"

// SSH Keys for user "core" will only be written at /home/core/.ssh
CoreUserSSHPath = "/home/" + CoreUserName + "/.ssh"

// SSH keys in RHCOS 8 will be written to /home/core/.ssh/authorized_keys
RHCOS8SSHKeyPath = CoreUserSSHPath + "/authorized_keys"

// SSH keys in RHCOS 9 / FCOS / SCOS will be written to /home/core/.ssh/authorized_keys.d/ignition
RHCOS9SSHKeyPath = CoreUserSSHPath + "/authorized_keys.d/ignition"
)
72 changes: 72 additions & 0 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -1490,6 +1490,71 @@ func removeIgnitionArtifacts() error {
return nil
}

// When we move from RHCOS 8 -> RHCOS 9, the SSH keys do not get written to the
// new location before the node reboots into RHCOS 9 because:
//
// 1. When the upgrade configs are written to the node, it is still running
// RHCOS 8, so the keys are not being written to the new location since the
// location is inferred from the currently booted OS.
// 2. The node reboots into RHCOS 9 to complete the upgrade.
// 3. The "are we on the latest config" functions detect that we are indeed on
// the latest config and so it does not attempt to perform an update.
//
// To work around that check on bootup if the we should use the new SSH key
// path and if the old SSH key path exists, we know that we need to migrate tot
// he new key path by calling dn.updateSSHKeyLocation().
func (dn *Daemon) isSSHKeyLocationUpdateRequired() (bool, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed for the PR. We may also need similar logic to write things back in authorized_keys when downgrade/rollback is possible.

if !dn.useNewSSHKeyPath() {
// Return early because we're not using the new SSH key path.
return false, nil
}

oldKeyExists, err := fileExists(constants.RHCOS8SSHKeyPath)
if err != nil {
return false, err
}

newKeyExists, err := fileExists(constants.RHCOS9SSHKeyPath)
if err != nil {
return false, err
}

// If the old key exists and the new key does not, we need to update.
return oldKeyExists && !newKeyExists, nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, just checking, in the off chance the oldKeyExists and newKeyExists, we wouldn't perform an upgrade, but the validation will later fail and degrade the node since the "old" location is still being used, right?

Copy link
Member Author

@cheesesashimi cheesesashimi Feb 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is correct. If that happens, it could be because of either:

  1. The old key couldn't be deleted for some reason (though the sync loop would've failed at that point anyway).
  2. The cluster admin (or something else) unexpectedly wrote to the old key location instead of creating a MachineConfig.

That won't happen right now though because the Config Drift Monitor is not yet aware of SSH keys.

}

// Decode the Ignition config and perform the SSH key update.
func (dn *Daemon) updateSSHKeyLocation(cfg *mcfgv1.MachineConfig) error {
glog.Infof("SSH key location update required. Moving SSH keys from %q to %q.", constants.RHCOS8SSHKeyPath, constants.RHCOS9SSHKeyPath)

ignConfig, err := ctrlcommon.ParseAndConvertConfig(cfg.Spec.Config.Raw)
if err != nil {
return fmt.Errorf("ignition failure when updating SSH key location: %w", err)
}

if err := dn.updateSSHKeys(ignConfig.Passwd.Users); err != nil {
return fmt.Errorf("could not write SSH keys to new location: %w", err)
}

return nil
}

// Determines if we need to update the SSH key location and performs the
// necessary update if so.
func (dn *Daemon) updateSSHKeyLocationIfNeeded(cfg *mcfgv1.MachineConfig) error {
sshKeyLocationUpdateRequired, err := dn.isSSHKeyLocationUpdateRequired()
if err != nil {
return fmt.Errorf("unable to determine if SSH key location update is required: %w", err)
}

if !sshKeyLocationUpdateRequired {
glog.Infof("SSH key location (%q) up-to-date!", constants.RHCOS9SSHKeyPath)
return nil
}

return dn.updateSSHKeyLocation(cfg)
}

// checkStateOnFirstRun is a core entrypoint for our state machine.
// It determines whether we're in our desired state, or if we're
// transitioning between states, and whether or not we need to update
Expand Down Expand Up @@ -1642,6 +1707,13 @@ func (dn *Daemon) checkStateOnFirstRun() error {
return dn.triggerUpdateWithMachineConfig(state.currentConfig, state.desiredConfig)
}

// When upgrading the OS, it is possible that the SSH key location will
// change. We should detect whether that is the case and update before we
// check for any config drift.
if err := dn.updateSSHKeyLocationIfNeeded(expectedConfig); err != nil {
return err
}

if err := dn.validateOnDiskState(expectedConfig); err != nil {
wErr := fmt.Errorf("unexpected on-disk state validating against %s: %w", expectedConfig.GetName(), err)
dn.nodeWriter.Eventf(corev1.EventTypeWarning, "OnDiskStateValidationFailed", wErr.Error())
Expand Down
169 changes: 116 additions & 53 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"os/exec"
"os/user"
Expand Down Expand Up @@ -36,8 +37,6 @@ const (
defaultDirectoryPermissions os.FileMode = 0o755
// defaultFilePermissions houses the default mode to use when no file permissions are provided
defaultFilePermissions os.FileMode = 0o644
// SSH Keys for user "core" will only be written at /home/core/.ssh
coreUserSSHPath = "/home/core/.ssh/"
// fipsFile is the file to check if FIPS is enabled
fipsFile = "/proc/sys/crypto/fips_enabled"
extensionsRepo = "/etc/yum.repos.d/coreos-extensions.repo"
Expand Down Expand Up @@ -1550,7 +1549,30 @@ func (dn *Daemon) writeFiles(files []ign3types.File) error {
return writeFiles(files)
}

func (dn *Daemon) atomicallyWriteSSHKey(keys string) error {
// Ensures that both the SSH root directory (/home/core/.ssh) as well as any
// subdirectories are created with the correct (0700) permissions.
func createSSHKeyDir(authKeyDir string) error {
glog.Infof("Creating missing SSH key dir at %s", authKeyDir)

mkdir := func(dir string) error {
return exec.Command("runuser", "-u", constants.CoreUserName, "--", "mkdir", "-m", "0700", "-p", dir).Run()
}

// Create the root SSH key directory (/home/core/.ssh) first.
if err := mkdir(filepath.Dir(constants.RHCOS8SSHKeyPath)); err != nil {
return err
}

// For RHCOS 8, creating /home/core/.ssh is all that is needed.
if authKeyDir == constants.RHCOS8SSHKeyPath {
return nil
}

// Create the next level of the SSH key directory (/home/core/.ssh/authorized_keys.d) for RHCOS 9 cases.
return mkdir(filepath.Dir(constants.RHCOS9SSHKeyPath))
}

func (dn *Daemon) atomicallyWriteSSHKey(authKeyPath, keys string) error {
uid, err := lookupUID(constants.CoreUserName)
if err != nil {
return err
Expand All @@ -1561,24 +1583,15 @@ func (dn *Daemon) atomicallyWriteSSHKey(keys string) error {
return err
}

var authKeyPath string
if dn.os.IsEL9() || dn.os.IsFCOS() {
// In FCOS and EL9, ignition writes the SSH key to ~/.ssh/authorized_keys.d/ignition,
// and the ssh-key-dir sshd AuthorizedKeysCommand binary reads it from there.
authKeyPath = filepath.Join(coreUserSSHPath, "authorized_keys.d", "ignition")
} else {
authKeyPath = filepath.Join(coreUserSSHPath, "authorized_keys")
}

// Keys should only be written to "/home/core/.ssh"
// Once Users are supported fully this should be writing to PasswdUser.HomeDir
glog.Infof("Writing SSHKeys at %q", authKeyPath)
glog.Infof("Writing SSH keys to %q", authKeyPath)

// Creating coreUserSSHPath in advance if it doesn't exist in order to ensure it is owned by core user
// Creating CoreUserSSHPath in advance if it doesn't exist in order to ensure it is owned by core user
// See https://bugzilla.redhat.com/show_bug.cgi?id=2107113
if _, err := os.Stat(coreUserSSHPath); os.IsNotExist(err) {
mkdirCoreCommand := exec.Command("runuser", "-u", constants.CoreUserName, "--", "mkdir", "-m", "0700", "-p", coreUserSSHPath)
if err := mkdirCoreCommand.Run(); err != nil {
authKeyDir := filepath.Dir(authKeyPath)
if _, err := os.Stat(authKeyDir); os.IsNotExist(err) {
if err := createSSHKeyDir(authKeyDir); err != nil {
return err
}
}
Expand All @@ -1587,7 +1600,7 @@ func (dn *Daemon) atomicallyWriteSSHKey(keys string) error {
return err
}

glog.V(2).Infof("Wrote SSHKeys at %s", authKeyPath)
glog.V(2).Infof("Wrote SSH keys to %q", authKeyPath)

return nil
}
Expand Down Expand Up @@ -1625,6 +1638,13 @@ func (dn *Daemon) SetPasswordHash(newUsers []ign3types.PasswdUser) error {
return nil
}

// Determines if we should use the new SSH key path
// (/home/core/.ssh/authorized_keys.d/ignition) or the old SSH key path
// (/home/core/.ssh/authorized_keys)
func (dn *Daemon) useNewSSHKeyPath() bool {
return dn.os.IsEL9() || dn.os.IsFCOS() || dn.os.IsSCOS()
}

// Update a given PasswdUser's SSHKey
func (dn *Daemon) updateSSHKeys(newUsers []ign3types.PasswdUser) error {
if len(newUsers) == 0 {
Expand All @@ -1650,50 +1670,93 @@ func (dn *Daemon) updateSSHKeys(newUsers []ign3types.PasswdUser) error {
concatSSHKeys = concatSSHKeys + string(k) + "\n"
}
}

authKeyPath := constants.RHCOS8SSHKeyPath

if !dn.mock {
authKeyPath := filepath.Join(coreUserSSHPath, "authorized_keys")
authKeyFragmentDirPath := filepath.Join(coreUserSSHPath, "authorized_keys.d")

if dn.os.IsFCOS() {
// In older versions of OKD, the keys were written to `/home/core/.ssh/authorized_keys`.
// Newer versions of OKD will however expect the keys at `/home/core/.ssh/authorized_keys.d/ignition`.
// Check if the authorized_keys file at the legacy path exists. If it does, remove it.
// It will be recreated at the new fragment path by the atomicallyWriteSSHKey function
// that is called right after.
_, err := os.Stat(authKeyPath)
if err == nil {
err := os.RemoveAll(authKeyPath)
if err != nil {
return fmt.Errorf("failed to remove path '%s': %w", authKeyPath, err)
}
} else if !os.IsNotExist(err) {
// This shouldn't ever happen
return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", authKeyPath, err)
// In RHCOS 8.6 or lower, the keys were written to `/home/core/.ssh/authorized_keys`.
// RHCOS 9.0+, FCOS, and SCOS will however expect the keys at `/home/core/.ssh/authorized_keys.d/ignition`.
// Check if the authorized_keys file at the legacy path exists. If it does, remove it.
// It will be recreated at the new fragment path by the atomicallyWriteSSHKey function
// that is called right after.
if dn.useNewSSHKeyPath() {
authKeyPath = constants.RHCOS9SSHKeyPath

if err := cleanSSHKeyPaths(); err != nil {
return err
}

// Ensure authorized_keys.d/ignition is the only fragment that exists
keyFragmentsDir, err := ctrlcommon.ReadDir(authKeyFragmentDirPath)
if err == nil {
for _, fragment := range keyFragmentsDir {
if fragment.Name() != "ignition" {
keyPath := filepath.Join(authKeyFragmentDirPath, fragment.Name())
err := os.RemoveAll(keyPath)
if err != nil {
return fmt.Errorf("failed to remove path '%s': %w", keyPath, err)
}
}
}
} else if !os.IsNotExist(err) {
// This shouldn't ever happen
return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", authKeyFragmentDirPath, err)
if err := removeNonIgnitionKeyPathFragments(); err != nil {
return err
}
}

// Note we write keys only for the core user and so this ignores the user list
if err := dn.atomicallyWriteSSHKey(concatSSHKeys); err != nil {
return err
return dn.atomicallyWriteSSHKey(authKeyPath, concatSSHKeys)
}

return nil
}

// Determines if a file exists by checking for the presence or lack thereof of
// an error when stat'ing the file. Returns any other error.
func fileExists(path string) (bool, error) {
_, err := os.Stat(path)
// If there is no error, the file definitely exists.
if err == nil {
return true, nil
}

// If the error matches fs.ErrNotExist, the file definitely does not exist.
if errors.Is(err, fs.ErrNotExist) {
return false, nil
}

// An unexpected error occurred.
return false, fmt.Errorf("cannot stat file: %w", err)
}

// Removes the old SSH key path (/home/core/.ssh/authorized_keys), if found.
func cleanSSHKeyPaths() error {
oldKeyExists, err := fileExists(constants.RHCOS8SSHKeyPath)
if err != nil {
return err
}

if !oldKeyExists {
return nil
}

if err := os.RemoveAll(constants.RHCOS8SSHKeyPath); err != nil {
return fmt.Errorf("failed to remove path '%s': %w", constants.RHCOS8SSHKeyPath, err)
}

return nil
}

// Ensures authorized_keys.d/ignition is the only fragment that exists within the /home/core/.ssh dir.
func removeNonIgnitionKeyPathFragments() error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to remove remained files in authorized_keys.d/ ? Someone can ideally add it using regular ignition. Is this an undesired behavior on RHCOS?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure. This behavior was introduced in 9bae8d6. I just refactored it into a separate function for readability.

// /home/core/.ssh/authorized_keys.d
authKeyFragmentDirPath := filepath.Dir(constants.RHCOS9SSHKeyPath)
// ignition
authKeyFragmentBasename := filepath.Base(constants.RHCOS9SSHKeyPath)

keyFragmentsDir, err := ctrlcommon.ReadDir(authKeyFragmentDirPath)
if err == nil {
for _, fragment := range keyFragmentsDir {
if fragment.Name() != authKeyFragmentBasename {
keyPath := filepath.Join(authKeyFragmentDirPath, fragment.Name())
err := os.RemoveAll(keyPath)
if err != nil {
return fmt.Errorf("failed to remove path '%s': %w", keyPath, err)
}
}
}
} else if !errors.Is(err, fs.ErrNotExist) {
// This shouldn't ever happen
return fmt.Errorf("unexpectedly failed to get info for path '%s': %w", constants.RHCOS9SSHKeyPath, err)
}

return nil
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/daemon/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,9 @@ func TestWriteFiles(t *testing.T) {
}
}

// This test provides a false sense of security. Given the combination of the
// mock mode in the MCD coupled with the inputs into this test, it effectively
// no-ops and does not test what we think it tests.
func TestUpdateSSHKeys(t *testing.T) {
d := newMockDaemon()

Expand Down
Loading