diff --git a/cmd/machine-config-daemon/pivot.sh b/cmd/machine-config-daemon/pivot.sh index 52fb1a5eef..4db2178c94 100755 --- a/cmd/machine-config-daemon/pivot.sh +++ b/cmd/machine-config-daemon/pivot.sh @@ -1,2 +1,5 @@ #!/usr/bin/sh -exec /usr/libexec/machine-config-daemon pivot "$@" +set -xeuo pipefail +# This script just exists for "CLI compatibility" for admins +# to use interactively via ssh/oc debug node. +exec /run/bin/machine-config-daemon pivot "$@" diff --git a/cmd/machine-config-daemon/start.go b/cmd/machine-config-daemon/start.go index b47c02cf72..77a31dc556 100644 --- a/cmd/machine-config-daemon/start.go +++ b/cmd/machine-config-daemon/start.go @@ -1,16 +1,21 @@ package main import ( + "bufio" "flag" + "io" "os" "os/exec" "path/filepath" "syscall" + "github.com/google/renameio" + "github.com/golang/glog" "github.com/openshift/machine-config-operator/internal/clients" ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common" "github.com/openshift/machine-config-operator/pkg/daemon" + daemonconsts "github.com/openshift/machine-config-operator/pkg/daemon/constants" "github.com/openshift/machine-config-operator/pkg/version" errors "github.com/pkg/errors" "github.com/spf13/cobra" @@ -64,6 +69,35 @@ func bindPodMounts(rootMount string) error { return nil } +func selfCopyToHost() error { + selfExecutableFd, err := os.Open("/proc/self/exe") + if err != nil { + return errors.Wrapf(err, "Opening our binary") + } + defer selfExecutableFd.Close() + if err := os.MkdirAll(filepath.Dir(daemonconsts.HostSelfBinary), 0755); err != nil { + return err + } + t, err := renameio.TempFile(filepath.Dir(daemonconsts.HostSelfBinary), daemonconsts.HostSelfBinary) + if err != nil { + return err + } + defer t.Cleanup() + var mode os.FileMode = 0755 + if err := t.Chmod(mode); err != nil { + return err + } + _, err = io.Copy(bufio.NewWriter(t), selfExecutableFd) + if err != nil { + return err + } + if err := t.CloseAtomicallyReplace(); err != nil { + return err + } + glog.Infof("Copied self to /run/bin/machine-config-daemon on host") + return nil +} + func runStartCmd(cmd *cobra.Command, args []string) { flag.Set("logtostderr", "true") flag.Parse() @@ -123,6 +157,13 @@ func runStartCmd(cmd *cobra.Command, args []string) { return } + // In the cluster case, for now we copy our binary out to the host + // for SELinux reasons, see https://bugzilla.redhat.com/show_bug.cgi?id=1839065 + if err := selfCopyToHost(); err != nil { + glog.Fatalf("%v", errors.Wrapf(err, "copying self to host")) + return + } + cb, err := clients.NewBuilder(startOpts.kubeconfig) if err != nil { glog.Fatalf("Failed to initialize ClientBuilder: %v", err) diff --git a/pkg/daemon/constants/constants.go b/pkg/daemon/constants/constants.go index ac44c819c9..746051be92 100644 --- a/pkg/daemon/constants/constants.go +++ b/pkg/daemon/constants/constants.go @@ -37,6 +37,9 @@ const ( // For more information, see https://github.com/openshift/pivot/pull/25/commits/c77788a35d7ee4058d1410e89e6c7937bca89f6c#diff-04c6e90faac2675aa89e2176d2eec7d8R44 EtcPivotFile = "/etc/pivot/image-pullspec" + // HostSelfBinary is the path where we copy our own binary to the host + HostSelfBinary = "/run/bin/machine-config-daemon" + // MachineConfigEncapsulatedPath contains all of the data from a MachineConfig object // except the Spec/Config object; this supports inverting+encapsulating a MachineConfig // object so that Ignition can process it on first boot, and then the MCD can act on diff --git a/pkg/daemon/rpm-ostree.go b/pkg/daemon/rpm-ostree.go index 508fcd927d..f297fdaaf9 100644 --- a/pkg/daemon/rpm-ostree.go +++ b/pkg/daemon/rpm-ostree.go @@ -3,10 +3,8 @@ package daemon import ( "encoding/json" "fmt" - "io/ioutil" "os" "os/exec" - "path/filepath" "strings" "time" @@ -305,41 +303,29 @@ func (r *RpmOstreeClient) PullAndRebase(container string, keep bool) (imgid stri // https://github.com/openshift/machine-config-operator/issues/314 // Basically rpm_ostree_t has mac_admin, container_t doesn't. func (r *RpmOstreeClient) RunPivot(osImageURL string) error { - if err := os.MkdirAll(filepath.Dir(constants.EtcPivotFile), os.FileMode(0755)); err != nil { - return fmt.Errorf("error creating leading dirs for %s: %v", constants.EtcPivotFile, err) - } - - if err := ioutil.WriteFile(constants.EtcPivotFile, []byte(osImageURL), 0644); err != nil { - return fmt.Errorf("error writing to %s: %v", constants.EtcPivotFile, err) - } - journalStopCh := make(chan time.Time) defer close(journalStopCh) go followPivotJournalLogs(journalStopCh) - // This is written by code injected by the MCS for compatibility with 4.1 bootimages, - // remove it to clean things up. - if err := os.Remove("/run/pivot/reboot-needed"); err != nil && !os.IsNotExist(err) { - return errors.Wrap(err, "deleting pivot reboot-needed file") - } - - service := "machine-config-daemon-host.service" - // We need to use pivot if it's there, because machine-config-daemon-host.service - // currently has a ConditionPathExists=!/usr/lib/systemd/system/pivot.service to - // avoid having *both* pivot and MCD try to update. This code can be dropped - // once we don't need to care about compat with older RHCOS. - var err error - _, err = os.Stat("/usr/lib/systemd/system/pivot.service") - if err != nil { - if !os.IsNotExist(err) { - return errors.Wrapf(err, "checking pivot service") + // These were previously injected by the MCS, let's clean them up if they exist + for _, p := range []string{constants.EtcPivotFile, "/run/pivot/reboot-needed"} { + if err := os.Remove(p); err != nil && !os.IsNotExist(err) { + return errors.Wrapf(err, "deleting %s", p) } - } else { - service = "pivot.service" } - err = exec.Command("systemctl", "start", service).Run() + + // We used to start machine-config-daemon-host here, but now we make a dynamic + // unit because that service was started in too many ways, and the systemd-run + // model of creating a unit dynamically is much clearer for what we want here; + // conceptually the service is just a dynamic child of this pod (if we could we'd + // tie the lifecycle together). Further, let's shorten our systemd unit names + // by using the mco- prefix, and we also inject the RPMOSTREE_CLIENT_ID now. + unitName := "mco-pivot" + glog.Infof("Executing OS update (pivot) on host via systemd-run unit=%s", unitName) + err := exec.Command("systemd-run", "--wait", "--collect", "--unit="+unitName, + "-E", "RPMOSTREE_CLIENT_ID=mco", constants.HostSelfBinary, "pivot", osImageURL).Run() if err != nil { - return errors.Wrapf(err, "failed to start %s", service) + return errors.Wrapf(err, "failed to run pivot") } return nil } @@ -349,8 +335,7 @@ func (r *RpmOstreeClient) RunPivot(osImageURL string) error { func followPivotJournalLogs(stopCh <-chan time.Time) { cmd := exec.Command("journalctl", "-f", "-b", "-o", "cat", "-u", "rpm-ostreed", - "-u", "pivot", - "-u", "machine-config-daemon-host") + "-u", "mco-pivot") cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr if err := cmd.Start(); err != nil { diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index ed4be2f3c0..f16b838038 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -1280,9 +1280,25 @@ func (dn *Daemon) updateOS(config *mcfgv1.MachineConfig) error { } glog.Infof("Updating OS to %s", newURL) - if err := dn.NodeUpdaterClient.RunPivot(newURL); err != nil { - MCDPivotErr.WithLabelValues(newURL, err.Error()).SetToCurrentTime() - return fmt.Errorf("failed to run pivot: %v", err) + // In the cluster case, for now we run indirectly via machine-config-daemon-host.service + // for SELinux reasons, see https://bugzilla.redhat.com/show_bug.cgi?id=1839065 + if dn.kubeClient != nil { + if err := dn.NodeUpdaterClient.RunPivot(newURL); err != nil { + MCDPivotErr.WithLabelValues(newURL, err.Error()).SetToCurrentTime() + return fmt.Errorf("failed to run pivot: %v", err) + } + } else { + // If we're here we're invoked via `machine-config-daemon-firstboot.service`, so let's + // just run the update directly rather than invoking another service. + client := NewNodeUpdaterClient() + _, changed, err := client.PullAndRebase(newURL, false) + if err != nil { + return err + } + if !changed { + // This really shouldn't happen + glog.Warningf("Didn't change when updating to %s ?", newURL) + } } return nil diff --git a/pkg/server/bootstrap_server.go b/pkg/server/bootstrap_server.go index cd7710b892..1cb966c4a3 100644 --- a/pkg/server/bootstrap_server.go +++ b/pkg/server/bootstrap_server.go @@ -97,7 +97,7 @@ func (bsc *bootstrapServer) GetConfig(cr poolRequest) (*runtime.RawExtension, er return nil, fmt.Errorf("server: could not unmarshal file %s, err: %v", fileName, err) } - appenders := getAppenders(currConf, bsc.kubeconfigFunc, mc.Spec.OSImageURL) + appenders := getAppenders(currConf, bsc.kubeconfigFunc) for _, a := range appenders { if err := a(mc); err != nil { return nil, err diff --git a/pkg/server/cluster_server.go b/pkg/server/cluster_server.go index d72075668d..3be2d4237a 100644 --- a/pkg/server/cluster_server.go +++ b/pkg/server/cluster_server.go @@ -72,7 +72,7 @@ func (cs *clusterServer) GetConfig(cr poolRequest) (*runtime.RawExtension, error return nil, fmt.Errorf("could not fetch config %s, err: %v", currConf, err) } - appenders := getAppenders(currConf, cs.kubeconfigFunc, mc.Spec.OSImageURL) + appenders := getAppenders(currConf, cs.kubeconfigFunc) for _, a := range appenders { if err := a(mc); err != nil { return nil, err diff --git a/pkg/server/server.go b/pkg/server/server.go index f595c795be..d1ef4b2f53 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -47,12 +47,10 @@ type Server interface { GetConfig(poolRequest) (*runtime.RawExtension, error) } -func getAppenders(currMachineConfig string, f kubeconfigFunc, osimageurl string) []appenderFunc { +func getAppenders(currMachineConfig string, f kubeconfigFunc) []appenderFunc { appenders := []appenderFunc{ // append machine annotations file. func(mc *mcfgv1.MachineConfig) error { return appendNodeAnnotations(&mc.Spec.Config, currMachineConfig) }, - // append pivot - func(mc *mcfgv1.MachineConfig) error { return appendInitialPivot(&mc.Spec.Config, osimageurl) }, // append kubeconfig. func(mc *mcfgv1.MachineConfig) error { return appendKubeConfig(&mc.Spec.Config, f) }, // append the machineconfig content @@ -82,50 +80,6 @@ func machineConfigToRawIgnition(mccfg *mcfgv1.MachineConfig) (*runtime.RawExtens return &mccfg.Spec.Config, nil } -// Golang :cry: -func boolToPtr(b bool) *bool { - return &b -} - -func appendInitialPivot(rawExt *runtime.RawExtension, osimageurl string) error { - if osimageurl == "" { - return nil - } - - // Tell pivot.service to pivot early - err := appendFileToRawIgnition(rawExt, daemonconsts.EtcPivotFile, osimageurl+"\n") - if err != nil { - return err - } - conf, report, err := ign.Parse(rawExt.Raw) - if err != nil { - return fmt.Errorf("failed to append initial pivot. Parsing Ignition config failed with error: %v\nReport: %v", err, report) - } - // Awful hack to create a file in /run - // https://github.com/openshift/machine-config-operator/pull/363#issuecomment-463397373 - // "So one gotcha here is that Ignition will actually write `/run/pivot/image-pullspec` to the filesystem rather than the `/run` tmpfs" - if len(conf.Systemd.Units) == 0 { - conf.Systemd.Units = make([]igntypes.Unit, 0) - } - unit := igntypes.Unit{ - Name: "mcd-write-pivot-reboot.service", - Enabled: boolToPtr(true), - Contents: `[Unit] -Before=pivot.service -ConditionFirstBoot=true -[Service] -ExecStart=/bin/sh -c 'mkdir /run/pivot && touch /run/pivot/reboot-needed' -[Install] -WantedBy=multi-user.target -`} - conf.Systemd.Units = append(conf.Systemd.Units, unit) - rawExt.Raw, err = json.Marshal(conf) - if err != nil { - return err - } - return nil -} - // appendInitialMachineConfig saves the full serialized MachineConfig that was served // by the MCS when the node first booted. This currently is only used as a debugging aid // in cases where there is unexpected "drift" between the initial bootstrap MC/Ignition and the one diff --git a/templates/common/_base/units/machine-config-daemon-firstboot-v42.service.yaml b/templates/common/_base/units/machine-config-daemon-firstboot-v42.service.yaml deleted file mode 100644 index 9fc59d3ee8..0000000000 --- a/templates/common/_base/units/machine-config-daemon-firstboot-v42.service.yaml +++ /dev/null @@ -1,21 +0,0 @@ -name: machine-config-daemon-firstboot-v42.service -enabled: true -contents: | - [Unit] - Description=Machine Config Daemon Firstboot (4.2 bootimage) - # Make sure it runs only on OSTree booted system - ConditionPathExists=/run/ostree-booted - BindsTo=ignition-firstboot-complete.service - ConditionPathExists=/etc/ignition-machine-config-encapsulated.json - # Note the opposite of this in machine-config-daemon-firstboot - ConditionPathExists=!/sysroot/.coreos-aleph-version.json - After=ignition-firstboot-complete.service - Before=kubelet.service - - [Service] - # Need oneshot to delay kubelet - Type=oneshot - ExecStart=/usr/libexec/machine-config-daemon pivot - - [Install] - WantedBy=multi-user.target diff --git a/templates/common/_base/units/machine-config-daemon-firstboot.service.yaml b/templates/common/_base/units/machine-config-daemon-firstboot.service.yaml index 32bc2fa377..18e05df7d6 100644 --- a/templates/common/_base/units/machine-config-daemon-firstboot.service.yaml +++ b/templates/common/_base/units/machine-config-daemon-firstboot.service.yaml @@ -7,17 +7,14 @@ contents: | ConditionPathExists=/run/ostree-booted # Removal of this file signals firstboot completion ConditionPathExists=/etc/ignition-machine-config-encapsulated.json - # We only want to run on 4.3 clusters and above; this came from - # https://github.com/coreos/coreos-assembler/pull/768 - ConditionPathExists=/sysroot/.coreos-aleph-version.json - After=ignition-firstboot-complete.service + After=machine-config-daemon-pull.service Before=crio.service crio-wipe.service Before=kubelet.service [Service] - # Need oneshot to delay kubelet Type=oneshot - ExecStart=/usr/libexec/machine-config-daemon firstboot-complete-machineconfig + RemainAfterExit=yes + ExecStart=/run/bin/machine-config-daemon firstboot-complete-machineconfig [Install] WantedBy=multi-user.target diff --git a/templates/common/_base/units/machine-config-daemon-host.service.yaml b/templates/common/_base/units/machine-config-daemon-host.service.yaml deleted file mode 100644 index 70592fc048..0000000000 --- a/templates/common/_base/units/machine-config-daemon-host.service.yaml +++ /dev/null @@ -1,38 +0,0 @@ -name: machine-config-daemon-host.service -enabled: false -contents: | - [Unit] - Description=Machine Config Daemon Initial - # This only applies to ostree (MCD) systems; - # see also https://github.com/openshift/machine-config-operator/issues/1046 - ConditionPathExists=/run/ostree-booted - ConditionPathExists=/etc/pivot/image-pullspec - # If pivot exists, defer to it. Note similar code in update.go - ConditionPathExists=!/usr/lib/systemd/system/pivot.service - After=ignition-firstboot-complete.service - Before=kubelet.service - - [Service] - # Need oneshot to delay kubelet - Type=oneshot - # TODO add --from-etc-pullspec after ratcheting - ExecStart=/usr/libexec/machine-config-daemon pivot - - [Install] - WantedBy=multi-user.target -dropins: - - name: 10-mco-default-env.conf - contents: | - [Unit] - {{if .Proxy -}} - [Service] - {{if .Proxy.HTTPProxy -}} - Environment=HTTP_PROXY={{.Proxy.HTTPProxy}} - {{end -}} - {{if .Proxy.HTTPSProxy -}} - Environment=HTTPS_PROXY={{.Proxy.HTTPSProxy}} - {{end -}} - {{if .Proxy.NoProxy -}} - Environment=NO_PROXY={{.Proxy.NoProxy}} - {{end -}} - {{end -}} diff --git a/templates/common/_base/units/machine-config-daemon-pull.service.yaml b/templates/common/_base/units/machine-config-daemon-pull.service.yaml new file mode 100644 index 0000000000..85a9faed0f --- /dev/null +++ b/templates/common/_base/units/machine-config-daemon-pull.service.yaml @@ -0,0 +1,36 @@ +name: "machine-config-daemon-pull.service" +enabled: true +contents: | + [Unit] + Description=Machine Config Daemon Pull + # Make sure it runs only on OSTree booted system + ConditionPathExists=/run/ostree-booted + # This "stamp file" is unlinked when we complete + # machine-config-daemon-firstboot.service + ConditionPathExists=/etc/ignition-machine-config-encapsulated.json + Wants=network-online.target + After=network-online.target + + [Service] + Type=oneshot + RemainAfterExit=yes + # See https://github.com/coreos/fedora-coreos-tracker/issues/354 + ExecStart=/bin/sh -c '/bin/mkdir -p /run/bin && chcon --reference=/usr/bin /run/bin' + ExecStart=/bin/sh -c "/usr/bin/podman pull --authfile=/var/lib/kubelet/config.json --quiet '{{ .Images.setupEtcdEnvKey }}'" + ExecStart=/bin/sh -c "/usr/bin/podman run --rm --quiet --net=host --entrypoint=cat '{{ .Images.setupEtcdEnvKey }}' /usr/bin/machine-config-daemon > /run/bin/machine-config-daemon.tmp" + ExecStart=/bin/sh -c '/usr/bin/chmod a+x /run/bin/machine-config-daemon.tmp && mv /run/bin/machine-config-daemon.tmp /run/bin/machine-config-daemon' + + {{if .Proxy -}} + {{if .Proxy.HTTPProxy -}} + Environment=HTTP_PROXY={{.Proxy.HTTPProxy}} + {{end -}} + {{if .Proxy.HTTPSProxy -}} + Environment=HTTPS_PROXY={{.Proxy.HTTPSProxy}} + {{end -}} + {{if .Proxy.NoProxy -}} + Environment=NO_PROXY={{.Proxy.NoProxy}} + {{end -}} + {{end -}} + + [Install] + RequiredBy=machine-config-daemon-firstboot.service