diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 0c5ccdafa1..c162ebc63b 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -117,6 +117,8 @@ type Daemon struct { booting bool currentConfigPath string + + loggerSupportsJournal bool } const ( @@ -217,6 +219,7 @@ func New( } dn.currentConfigPath = currentConfigPath dn.atomicSSHKeysWriter = dn.atomicallyWriteSSHKey + dn.loggerSupportsJournal = dn.isLoggingToJournalSupported() return dn, nil } @@ -745,6 +748,28 @@ func (dn *Daemon) LogSystemData() { glog.Infof("journalctl --list-boots:\n" + string(boots)) } +const ( + pendingConfigPath = "/etc/machine-config-daemon/state.json" +) + +type pendingConfigState struct { + PendingConfig string `json:"pendingConfig,omitempty"` + BootID string `json:"bootID,omitempty"` +} + +// XXX: drop this +func (dn *Daemon) writePendingConfig(desiredConfig *mcfgv1.MachineConfig) error { + t := &pendingConfigState{ + PendingConfig: desiredConfig.GetName(), + BootID: dn.bootID, + } + b, err := json.Marshal(t) + if err != nil { + return err + } + return writeFileAtomicallyWithDefaults(pendingConfigPath, b) +} + // XXX: drop this // we need this compatibility layer for now func (dn *Daemon) getPendingConfig() (string, error) { @@ -756,10 +781,6 @@ func (dn *Daemon) getPendingConfig() (string, error) { dn.logSystem("error loading pending config %v", err) return "", nil } - type pendingConfigState struct { - PendingConfig string `json:"pendingConfig,omitempty"` - BootID string `json:"bootID,omitempty"` - } var p pendingConfigState if err := json.Unmarshal([]byte(s), &p); err != nil { return "", errors.Wrapf(err, "parsing transient state") @@ -910,7 +931,7 @@ func (dn *Daemon) CheckStateOnBoot() error { } } - inDesiredConfig := state.currentConfig == state.desiredConfig + inDesiredConfig := state.currentConfig.GetName() == state.desiredConfig.GetName() if inDesiredConfig { if state.pendingConfig != nil { // Great, we've successfully rebooted for the desired config, diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index da1d13a29d..7fa92f9ca1 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -13,6 +13,7 @@ import ( "reflect" "strconv" "strings" + "syscall" "time" ignv2_2types "github.com/coreos/ignition/config/v2_2/types" @@ -94,6 +95,9 @@ func (dn *Daemon) updateOSAndReboot(newConfig *mcfgv1.MachineConfig) (retErr err } defer func() { if retErr != nil { + if dn.recorder != nil { + dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "PendingConfigRollBack", fmt.Sprintf("Rolling back pending config %s: %v", newConfig.GetName(), retErr)) + } if out, err := dn.storePendingState(newConfig, 0); err != nil { retErr = errors.Wrapf(retErr, "error rolling back pending config %v: %s", err, string(out)) return @@ -702,26 +706,64 @@ func (dn *Daemon) updateOS(config *mcfgv1.MachineConfig) error { return nil } -// getPendingState loads the JSON state we cache across attempting to apply -// a config+reboot. If no pending state is available, ("", nil) will be returned. -// The bootID is stored in the pending state; if it is unchanged, we assume -// that we failed to reboot; that for now should be a fatal error, in order to avoid -// reboot loops. -func (dn *Daemon) getPendingState() (string, error) { - journalOutput, err := exec.Command("journalctl", "-o", "json", fmt.Sprintf("MESSAGE_ID=%s", pendingStateMessageID)).CombinedOutput() +// RHEL 7.6 logger (util-linux) doesn't have the --journald flag +func (dn *Daemon) isLoggingToJournalSupported() bool { + loggerOutput, err := exec.Command("logger", "--help").CombinedOutput() if err != nil { - return "", err + dn.logSystem("error running logger --help: %v", err) + if dn.OperatingSystem == machineConfigDaemonOSRHCOS { + return true + } + return false } + return strings.Contains(string(loggerOutput), "--journald") +} + +func (dn *Daemon) getPendingStateLegacyLogger() (string, error) { + glog.Info("logger doesn't support --jounald, grepping the journal") + + cmdLiteral := "journalctl -o cat | grep OPENSHIFT_MACHINE_CONFIG_DAEMON_LEGACY_LOG_HACK" + cmd := exec.Command("bash", "-c", cmdLiteral) + var combinedOutput bytes.Buffer + cmd.Stdout = &combinedOutput + cmd.Stderr = &combinedOutput + if err := cmd.Start(); err != nil { + return "", errors.Wrap(err, "failed shelling out to journalctl -o cat") + } + if err := cmd.Wait(); err != nil { + if exiterr, ok := err.(*exec.ExitError); ok { + // The program has exited with an exit code != 0 + status, ok := exiterr.Sys().(syscall.WaitStatus) + if ok { + // grep exit with 1 if it doesn't find anything + // from man: Normally, the exit status is 0 if selected lines are found and 1 otherwise. But the exit status is 2 if an error occurred + if status.ExitStatus() == 1 { + return "", nil + } + if status.ExitStatus() > 1 { + return "", errors.Wrapf(fmt.Errorf("grep exited with %s", combinedOutput.Bytes()), "failed to grep on journal output: %v", exiterr) + } + } + } else { + return "", errors.Wrap(err, "command wait error") + } + } + journalOutput := combinedOutput.Bytes() + // just an extra safety check? if len(journalOutput) == 0 { return "", nil } + return dn.processJournalOutput(journalOutput) +} + +func (dn *Daemon) processJournalOutput(journalOutput []byte) (string, error) { lines := strings.Split(strings.TrimSpace(string(journalOutput)), "\n") last := lines[len(lines)-1] type journalMsg struct { - Message string `json:"MESSAGE,omitempty"` - // TODO(runcom): journal messages have a _BOOT_ID field, might use that - BootID string `json:"BOOT_ID,omitempty"` - Pending string `json:"PENDING,omitempty"` + Message string `json:"MESSAGE,omitempty"` + BootID string `json:"BOOT_ID,omitempty"` + Pending string `json:"PENDING,omitempty"` + OldLogger string `json:"OPENSHIFT_MACHINE_CONFIG_DAEMON_LEGACY_LOG_HACK,omitempty"` // unused today } entry := &journalMsg{} if err := json.Unmarshal([]byte(last), entry); err != nil { @@ -736,7 +778,46 @@ func (dn *Daemon) getPendingState() (string, error) { return entry.Message, nil } +// getPendingState loads the JSON state we cache across attempting to apply +// a config+reboot. If no pending state is available, ("", nil) will be returned. +// The bootID is stored in the pending state; if it is unchanged, we assume +// that we failed to reboot; that for now should be a fatal error, in order to avoid +// reboot loops. +func (dn *Daemon) getPendingState() (string, error) { + if !dn.loggerSupportsJournal { + return dn.getPendingStateLegacyLogger() + } + journalOutput, err := exec.Command("journalctl", "-o", "json", fmt.Sprintf("MESSAGE_ID=%s", pendingStateMessageID)).CombinedOutput() + if err != nil { + return "", errors.Wrap(err, "error running journalctl -o json") + } + if len(journalOutput) == 0 { + return "", nil + } + return dn.processJournalOutput(journalOutput) +} + +func (dn *Daemon) storePendingStateLegacyLogger(pending *mcfgv1.MachineConfig, isPending int) ([]byte, error) { + glog.Info("logger doesn't support --jounald, logging json directly") + + if isPending == 1 { + if err := dn.writePendingConfig(pending); err != nil { + return nil, err + } + } else { + if err := os.Remove(pendingConfigPath); err != nil { + return nil, err + } + } + + oldLogger := exec.Command("logger", fmt.Sprintf(`{"MESSAGE": "%s", "BOOT_ID": "%s", "PENDING": "%d", "OPENSHIFT_MACHINE_CONFIG_DAEMON_LEGACY_LOG_HACK": "1"}`, pending.GetName(), dn.bootID, isPending)) + return oldLogger.CombinedOutput() +} + func (dn *Daemon) storePendingState(pending *mcfgv1.MachineConfig, isPending int) ([]byte, error) { + if !dn.loggerSupportsJournal { + return dn.storePendingStateLegacyLogger(pending, isPending) + } logger := exec.Command("logger", "--journald") var pendingState bytes.Buffer