From 5fabc5ade2dde1c1b362f449322ca41e9de9e1c8 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Wed, 4 Sep 2024 16:14:42 -0400 Subject: [PATCH 1/2] ops: add new FileExists method Prep for next patch. Also use that in one spot where we were manually calling `stat`. --- src/installer/installer.go | 2 +- src/installer/installer_test.go | 4 ++-- src/ops/mock_ops.go | 14 ++++++++++++++ src/ops/ops.go | 8 +++++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/installer/installer.go b/src/installer/installer.go index 9143881776..c44b0a064a 100644 --- a/src/installer/installer.go +++ b/src/installer/installer.go @@ -658,7 +658,7 @@ func (i *installer) waitForBootkube(ctx context.Context) { return case <-time.After(generalWaitInterval): // check if bootkube is done every 5 seconds - if _, err := i.ops.ExecPrivilegeCommand(nil, "stat", "/opt/openshift/.bootkube.done"); err == nil { + if i.ops.FileExists("/opt/openshift/.bootkube.done") { // in case bootkube is done log the status and return i.log.Info("bootkube service completed") out, _ := i.ops.ExecPrivilegeCommand(nil, "systemctl", "status", "bootkube.service") diff --git a/src/installer/installer_test.go b/src/installer/installer_test.go index 00274d04a5..cc964b960a 100644 --- a/src/installer/installer_test.go +++ b/src/installer/installer_test.go @@ -297,7 +297,7 @@ var _ = Describe("installer HostRoleMaster role", func() { } waitForBootkubeSuccess := func() { mockbmclient.EXPECT().UpdateHostInstallProgress(gomock.Any(), infraEnvId, hostId, models.HostStageWaitingForBootkube, "").Return(nil).Times(1) - mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "stat", "/opt/openshift/.bootkube.done").Return("OK", nil).Times(1) + mockops.EXPECT().FileExists("/opt/openshift/.bootkube.done").Return(true).Times(1) } bootkubeStatusSuccess := func() { mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "status", "bootkube.service").Return("1", nil).Times(1) @@ -1048,7 +1048,7 @@ var _ = Describe("installer HostRoleMaster role", func() { } waitForBootkubeSuccess := func() { mockbmclient.EXPECT().UpdateHostInstallProgress(gomock.Any(), infraEnvId, hostId, models.HostStageWaitingForBootkube, "").Return(nil).Times(1) - mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "stat", "/opt/openshift/.bootkube.done").Return("OK", nil).Times(1) + mockops.EXPECT().FileExists("/opt/openshift/.bootkube.done").Return(true).Times(1) } bootkubeStatusSuccess := func() { mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "status", "bootkube.service").Return("1", nil).Times(1) diff --git a/src/ops/mock_ops.go b/src/ops/mock_ops.go index e4627769a9..9b497b813a 100644 --- a/src/ops/mock_ops.go +++ b/src/ops/mock_ops.go @@ -97,6 +97,20 @@ func (mr *MockOpsMockRecorder) DryRebootHappened(markerPath any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DryRebootHappened", reflect.TypeOf((*MockOps)(nil).DryRebootHappened), markerPath) } +// FileExists mocks base method. +func (m *MockOps) FileExists(path string) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "FileExists", path) + ret0, _ := ret[0].(bool) + return ret0 +} + +// FileExists indicates an expected call of FileExists. +func (mr *MockOpsMockRecorder) FileExists(path any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FileExists", reflect.TypeOf((*MockOps)(nil).FileExists), path) +} + // EvaluateDiskSymlink mocks base method. func (m *MockOps) EvaluateDiskSymlink(arg0 string) string { m.ctrl.T.Helper() diff --git a/src/ops/ops.go b/src/ops/ops.go index 0640184c1f..058fffe733 100644 --- a/src/ops/ops.go +++ b/src/ops/ops.go @@ -66,6 +66,7 @@ type Ops interface { FormatDisk(string) error CreateManifests(string, []byte) error DryRebootHappened(markerPath string) bool + FileExists(path string) bool ExecPrivilegeCommand(liveLogger io.Writer, command string, args ...string) (string, error) ReadFile(filePath string) ([]byte, error) GetEncapsulatedMC(ignitionPath string) (*mcfgv1.MachineConfig, error) @@ -652,7 +653,12 @@ func (o *ops) CreateManifests(kubeconfig string, content []byte) error { // The dry run installer creates this file on "Reboot" (instead of actually rebooting) // We use this function to check whether the given node in the cluster have already rebooted func (o *ops) DryRebootHappened(markerPath string) bool { - _, err := o.ExecPrivilegeCommand(nil, "stat", markerPath) + return o.FileExists(markerPath) +} + +// FileExists checks if a file exists +func (o *ops) FileExists(path string) bool { + _, err := o.ExecPrivilegeCommand(nil, "stat", path) return err == nil } From f6996ddf3ce3e01d16e5f31b43fc506b837282df Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Wed, 4 Sep 2024 20:03:58 -0400 Subject: [PATCH 2/2] overlay node image before bootstrapping if necessary As per https://github.com/openshift/enhancements/pull/1637, we're trying to get rid of all OpenShift-versioned components from the bootimages. This means that there will no longer be oc, kubelet, or crio binaries for example, which bootstrapping obviously relies on. To adapt to this, the OpenShift installer now ships a new `node-image-overlay.service` in its bootstrap Ignition config. This service takes care of pulling down the node image and overlaying it, effectively updating the system to the node image version. Here, we accordingly also adapt assisted-installer so that we run `node-image-overlay.service` before starting e.g. `kubelet.service` and `bootkube.service`. See also: https://github.com/openshift/installer/pull/8742 --- src/installer/installer.go | 56 ++++++++++++++++++++ src/installer/installer_test.go | 90 +++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+) diff --git a/src/installer/installer.go b/src/installer/installer.go index c44b0a064a..c582085220 100644 --- a/src/installer/installer.go +++ b/src/installer/installer.go @@ -51,6 +51,9 @@ const ( singleNodeMasterIgnitionPath = "/opt/openshift/master.ign" waitingForMastersStatusInfo = "Waiting for masters to join bootstrap control plane" waitingForBootstrapToPrepare = "Waiting for bootstrap node preparation" + nodeImagePullService = "node-image-pull.service" + nodeImageOverlayService = "node-image-overlay.service" + openshiftClientBin = "/usr/bin/oc" ) var generalWaitTimeout = 30 * time.Second @@ -453,6 +456,30 @@ func (i *installer) startBootstrap() error { return err } + // If we're in a pure RHEL/CentOS environment, we need to overlay the node image + // first to have access to e.g. oc, kubelet, cri-o, etc... + // https://github.com/openshift/enhancements/pull/1637 + if !i.ops.FileExists(openshiftClientBin) { + err = i.ops.SystemctlAction("start", "--no-block", nodeImagePullService, nodeImageOverlayService) + if err != nil { + return err + } + + if err = i.waitForActiveService(nodeImagePullService, context.Background()); err != nil { + return err + } + + if err = i.waitForActiveService(nodeImageOverlayService, context.Background()); err != nil { + return err + } + + // This is a sanity-check; the overlay was successful so we never expect this to + // fail unless there's a bug somewhere. + if !i.ops.FileExists(openshiftClientBin) { + return errors.Errorf("%s successful but missing %s", nodeImageOverlayService, openshiftClientBin) + } + } + servicesToStart := []string{"bootkube.service", "approve-csr.service", "progress.service"} for _, service := range servicesToStart { err = i.ops.SystemctlAction("start", service) @@ -669,6 +696,35 @@ func (i *installer) waitForBootkube(ctx context.Context) { } } +func (i *installer) waitForActiveService(service string, ctx context.Context) error { + i.log.Infof("Waiting for %s to complete", service) + + var rErr error + waitErr := utils.WaitForPredicate(waitForeverTimeout, generalWaitInterval, func() bool { + // Check if service has completed every 5 seconds. Use `show -P ActiveState` + // instead of `is-active` because the latter has exit code semantics we don't want. + if result, err := i.ops.ExecPrivilegeCommand(nil, "systemctl", "show", "-P", "ActiveState", service); err != nil { + i.log.WithError(err).Warnf("error occurred checking state of %s", service) + } else { + i.log.Infof("%s status: %s", service, result) + switch result { + case "active": + return true + case "failed": + out, _ := i.ops.ExecPrivilegeCommand(nil, "systemctl", "status", service) + i.log.Info(out) + rErr = errors.Errorf("service %s failed", service) + return true + default: + break + } + } + return false + }) + + return stderrors.Join(rErr, waitErr) +} + func (i *installer) waitForController(kc k8s_client.K8SClient) error { i.log.Infof("Waiting for controller to be ready") i.UpdateHostInstallProgress(models.HostStageWaitingForController, "waiting for controller pod ready event") diff --git a/src/installer/installer_test.go b/src/installer/installer_test.go index cc964b960a..2fac4a11f1 100644 --- a/src/installer/installer_test.go +++ b/src/installer/installer_test.go @@ -271,6 +271,28 @@ var _ = Describe("installer HostRoleMaster role", func() { mockops.EXPECT().CreateRandomHostname(gomock.Any()).Return(nil).Times(1) } } + checkOcBinary := func(exists bool) { + mockops.EXPECT().FileExists(openshiftClientBin).Return(exists).Times(1) + } + checkOverlayService := func(name string, injectError bool) { + // verify that we retry if `systemctl show` fails for some reason + mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("", errors.New("bad")).Times(1) + // verify that we retry if service is still inactive (hasn't started yet) + mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("inactive", nil).Times(1) + if !injectError { + // ok, succeed this time + mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("active", nil).Times(1) + } else { + // oh no! the service failed! + mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("failed", nil).Times(1) + mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "status", name).Return("status", nil).Times(1) + } + } + overlayNodeImage := func(injectError bool) { + mockops.EXPECT().SystemctlAction("start", "--no-block", nodeImagePullService, nodeImageOverlayService).Return(nil).Times(1) + checkOverlayService(nodeImagePullService, false) + checkOverlayService(nodeImageOverlayService, injectError) + } startServicesSuccess := func() { services := []string{"bootkube.service", "progress.service", "approve-csr.service"} for i := range services { @@ -354,6 +376,7 @@ var _ = Describe("installer HostRoleMaster role", func() { checkLocalHostname("notlocalhost", nil) restartNetworkManager(nil) prepareControllerSuccess() + checkOcBinary(true) startServicesSuccess() WaitMasterNodesSucccess() waitForBootkubeSuccess() @@ -375,6 +398,63 @@ var _ = Describe("installer HostRoleMaster role", func() { ret := installerObj.InstallNode() Expect(ret).Should(BeNil()) }) + It("bootstrap role happy flow on RHEL-only bootimage", func() { + updateProgressSuccess([][]string{{string(models.HostStageStartingInstallation), conf.Role}, + {string(models.HostStageWaitingForControlPlane), waitingForBootstrapToPrepare}, + {string(models.HostStageWaitingForControlPlane), waitingForMastersStatusInfo}, + {string(models.HostStageInstalling), string(models.HostRoleMaster)}, + {string(models.HostStageWritingImageToDisk)}, + {string(models.HostStageRebooting)}, + }) + bootstrapSetup() + checkLocalHostname("notlocalhost", nil) + restartNetworkManager(nil) + prepareControllerSuccess() + checkOcBinary(false) + overlayNodeImage(false) + checkOcBinary(true) + startServicesSuccess() + WaitMasterNodesSucccess() + waitForBootkubeSuccess() + bootkubeStatusSuccess() + waitForETCDBootstrapSuccess() + bootstrapETCDStatusSuccess() + resolvConfSuccess() + waitForControllerSuccessfully(conf.ClusterID) + //HostRoleMaster flow: + downloadHostIgnitionSuccess(infraEnvId, hostId, "master-host-id.ign") + writeToDiskSuccess(gomock.Any()) + reportLogProgressSuccess() + setBootOrderSuccess(gomock.Any()) + uploadLogsSuccess(true) + ironicAgentDoesntExist() + rebootSuccess() + getEncapsulatedMcSuccess(nil) + overwriteImageSuccess() + ret := installerObj.InstallNode() + Expect(ret).Should(BeNil()) + }) + It("bootstrap role fails on RHEL-only bootimage if can't overlay node image", func() { + updateProgressSuccess([][]string{{string(models.HostStageStartingInstallation), conf.Role}, + {string(models.HostStageWaitingForControlPlane), waitingForBootstrapToPrepare}, + {string(models.HostStageInstalling), string(models.HostRoleMaster)}, + {string(models.HostStageWritingImageToDisk)}, + }) + bootstrapSetup() + checkLocalHostname("notlocalhost", nil) + restartNetworkManager(nil) + prepareControllerSuccess() + checkOcBinary(false) + overlayNodeImage(true) + //HostRoleMaster flow: + downloadHostIgnitionSuccess(infraEnvId, hostId, "master-host-id.ign") + writeToDiskSuccess(gomock.Any()) + setBootOrderSuccess(gomock.Any()) + getEncapsulatedMcSuccess(nil) + overwriteImageSuccess() + ret := installerObj.InstallNode() + Expect(ret).To(HaveOccurred()) + }) It("bootstrap role happy flow with invalid hostname", func() { updateProgressSuccess([][]string{{string(models.HostStageStartingInstallation), conf.Role}, {string(models.HostStageWaitingForControlPlane), waitingForBootstrapToPrepare}, @@ -387,6 +467,7 @@ var _ = Describe("installer HostRoleMaster role", func() { checkLocalHostname("InvalidHostname", nil) restartNetworkManager(nil) prepareControllerSuccess() + checkOcBinary(true) startServicesSuccess() WaitMasterNodesSucccess() waitForBootkubeSuccess() @@ -420,6 +501,7 @@ var _ = Describe("installer HostRoleMaster role", func() { checkLocalHostname("localhost", nil) restartNetworkManager(nil) prepareControllerSuccess() + checkOcBinary(true) startServicesSuccess() WaitMasterNodesSucccess() waitForBootkubeSuccess() @@ -454,6 +536,7 @@ var _ = Describe("installer HostRoleMaster role", func() { checkLocalHostname("notlocalhost", nil) restartNetworkManager(nil) prepareControllerSuccess() + checkOcBinary(true) startServicesSuccess() WaitMasterNodesSucccessWithCluster(&models.Cluster{ Platform: &models.Platform{ @@ -520,6 +603,7 @@ var _ = Describe("installer HostRoleMaster role", func() { checkLocalHostname("notlocalhost", nil) restartNetworkManager(nil) prepareControllerSuccess() + checkOcBinary(true) startServicesSuccess() WaitMasterNodesSucccess() waitForBootkubeSuccess() @@ -633,6 +717,7 @@ var _ = Describe("installer HostRoleMaster role", func() { checkLocalHostname("notlocalhost", nil) restartNetworkManager(nil) prepareControllerSuccess() + checkOcBinary(true) startServicesSuccess() mockbmclient.EXPECT().GetEnabledHostsNamesHosts(gomock.Any(), gomock.Any()).Return(inventoryNamesHost, nil).AnyTimes() @@ -1037,6 +1122,9 @@ var _ = Describe("installer HostRoleMaster role", func() { mockops.EXPECT().CreateRandomHostname(gomock.Any()).Return(nil).Times(1) } } + checkOcBinary := func(exists bool) { + mockops.EXPECT().FileExists(openshiftClientBin).Return(exists).Times(1) + } startServicesSuccess := func() { services := []string{"bootkube.service", "progress.service", "approve-csr.service"} for i := range services { @@ -1079,6 +1167,7 @@ var _ = Describe("installer HostRoleMaster role", func() { singleNodeBootstrapSetup() checkLocalHostname("localhost", nil) prepareControllerSuccess() + checkOcBinary(true) startServicesSuccess() waitForBootkubeSuccess() bootkubeStatusSuccess() @@ -1116,6 +1205,7 @@ var _ = Describe("installer HostRoleMaster role", func() { singleNodeBootstrapSetup() checkLocalHostname("localhost", nil) prepareControllerSuccess() + checkOcBinary(true) startServicesSuccess() waitForBootkubeSuccess() bootkubeStatusSuccess()