Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion src/installer/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ const (
singleNodeMasterIgnitionPath = "/opt/openshift/master.ign"
waitingForMastersStatusInfo = "Waiting for masters to join bootstrap control plane"
waitingForBootstrapToPrepare = "Waiting for bootstrap node preparation"
nodeImagePullService = "node-image-pull.service"
nodeImageOverlayService = "node-image-overlay.service"
openshiftClientBin = "/usr/bin/oc"
)

var generalWaitTimeout = 30 * time.Second
Expand Down Expand Up @@ -453,6 +456,30 @@ func (i *installer) startBootstrap() error {
return err
}

// If we're in a pure RHEL/CentOS environment, we need to overlay the node image
// first to have access to e.g. oc, kubelet, cri-o, etc...
// https://github.com/openshift/enhancements/pull/1637
if !i.ops.FileExists(openshiftClientBin) {
err = i.ops.SystemctlAction("start", "--no-block", nodeImagePullService, nodeImageOverlayService)
if err != nil {
return err
}

if err = i.waitForActiveService(nodeImagePullService, context.Background()); err != nil {
return err
}

if err = i.waitForActiveService(nodeImageOverlayService, context.Background()); err != nil {
return err
}

// This is a sanity-check; the overlay was successful so we never expect this to
// fail unless there's a bug somewhere.
if !i.ops.FileExists(openshiftClientBin) {
return errors.Errorf("%s successful but missing %s", nodeImageOverlayService, openshiftClientBin)
}
}

servicesToStart := []string{"bootkube.service", "approve-csr.service", "progress.service"}
for _, service := range servicesToStart {
err = i.ops.SystemctlAction("start", service)
Expand Down Expand Up @@ -658,7 +685,7 @@ func (i *installer) waitForBootkube(ctx context.Context) {
return
case <-time.After(generalWaitInterval):
// check if bootkube is done every 5 seconds
if _, err := i.ops.ExecPrivilegeCommand(nil, "stat", "/opt/openshift/.bootkube.done"); err == nil {
if i.ops.FileExists("/opt/openshift/.bootkube.done") {
// in case bootkube is done log the status and return
i.log.Info("bootkube service completed")
out, _ := i.ops.ExecPrivilegeCommand(nil, "systemctl", "status", "bootkube.service")
Expand All @@ -669,6 +696,35 @@ func (i *installer) waitForBootkube(ctx context.Context) {
}
}

func (i *installer) waitForActiveService(service string, ctx context.Context) error {
i.log.Infof("Waiting for %s to complete", service)

var rErr error
waitErr := utils.WaitForPredicate(waitForeverTimeout, generalWaitInterval, func() bool {
// Check if service has completed every 5 seconds. Use `show -P ActiveState`
// instead of `is-active` because the latter has exit code semantics we don't want.
if result, err := i.ops.ExecPrivilegeCommand(nil, "systemctl", "show", "-P", "ActiveState", service); err != nil {
i.log.WithError(err).Warnf("error occurred checking state of %s", service)
} else {
i.log.Infof("%s status: %s", service, result)
switch result {
case "active":
return true
case "failed":
out, _ := i.ops.ExecPrivilegeCommand(nil, "systemctl", "status", service)
i.log.Info(out)
rErr = errors.Errorf("service %s failed", service)
return true
default:
break
}
}
return false
})

return stderrors.Join(rErr, waitErr)
}

func (i *installer) waitForController(kc k8s_client.K8SClient) error {
i.log.Infof("Waiting for controller to be ready")
i.UpdateHostInstallProgress(models.HostStageWaitingForController, "waiting for controller pod ready event")
Expand Down
94 changes: 92 additions & 2 deletions src/installer/installer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,28 @@ var _ = Describe("installer HostRoleMaster role", func() {
mockops.EXPECT().CreateRandomHostname(gomock.Any()).Return(nil).Times(1)
}
}
checkOcBinary := func(exists bool) {
mockops.EXPECT().FileExists(openshiftClientBin).Return(exists).Times(1)
}
checkOverlayService := func(name string, injectError bool) {
// verify that we retry if `systemctl show` fails for some reason
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("", errors.New("bad")).Times(1)
// verify that we retry if service is still inactive (hasn't started yet)
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("inactive", nil).Times(1)
if !injectError {
// ok, succeed this time
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("active", nil).Times(1)
} else {
// oh no! the service failed!
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "show", "-P", "ActiveState", name).Return("failed", nil).Times(1)
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "status", name).Return("status", nil).Times(1)
}
}
overlayNodeImage := func(injectError bool) {
mockops.EXPECT().SystemctlAction("start", "--no-block", nodeImagePullService, nodeImageOverlayService).Return(nil).Times(1)
checkOverlayService(nodeImagePullService, false)
checkOverlayService(nodeImageOverlayService, injectError)
}
startServicesSuccess := func() {
services := []string{"bootkube.service", "progress.service", "approve-csr.service"}
for i := range services {
Expand All @@ -297,7 +319,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
}
waitForBootkubeSuccess := func() {
mockbmclient.EXPECT().UpdateHostInstallProgress(gomock.Any(), infraEnvId, hostId, models.HostStageWaitingForBootkube, "").Return(nil).Times(1)
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "stat", "/opt/openshift/.bootkube.done").Return("OK", nil).Times(1)
mockops.EXPECT().FileExists("/opt/openshift/.bootkube.done").Return(true).Times(1)
}
bootkubeStatusSuccess := func() {
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "status", "bootkube.service").Return("1", nil).Times(1)
Expand Down Expand Up @@ -354,6 +376,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
checkLocalHostname("notlocalhost", nil)
restartNetworkManager(nil)
prepareControllerSuccess()
checkOcBinary(true)
startServicesSuccess()
WaitMasterNodesSucccess()
waitForBootkubeSuccess()
Expand All @@ -375,6 +398,63 @@ var _ = Describe("installer HostRoleMaster role", func() {
ret := installerObj.InstallNode()
Expect(ret).Should(BeNil())
})
It("bootstrap role happy flow on RHEL-only bootimage", func() {
updateProgressSuccess([][]string{{string(models.HostStageStartingInstallation), conf.Role},
{string(models.HostStageWaitingForControlPlane), waitingForBootstrapToPrepare},
{string(models.HostStageWaitingForControlPlane), waitingForMastersStatusInfo},
{string(models.HostStageInstalling), string(models.HostRoleMaster)},
{string(models.HostStageWritingImageToDisk)},
{string(models.HostStageRebooting)},
})
bootstrapSetup()
checkLocalHostname("notlocalhost", nil)
restartNetworkManager(nil)
prepareControllerSuccess()
checkOcBinary(false)
overlayNodeImage(false)
checkOcBinary(true)
startServicesSuccess()
WaitMasterNodesSucccess()
waitForBootkubeSuccess()
bootkubeStatusSuccess()
waitForETCDBootstrapSuccess()
bootstrapETCDStatusSuccess()
resolvConfSuccess()
waitForControllerSuccessfully(conf.ClusterID)
//HostRoleMaster flow:
downloadHostIgnitionSuccess(infraEnvId, hostId, "master-host-id.ign")
writeToDiskSuccess(gomock.Any())
reportLogProgressSuccess()
setBootOrderSuccess(gomock.Any())
uploadLogsSuccess(true)
ironicAgentDoesntExist()
rebootSuccess()
getEncapsulatedMcSuccess(nil)
overwriteImageSuccess()
ret := installerObj.InstallNode()
Expect(ret).Should(BeNil())
})
It("bootstrap role fails on RHEL-only bootimage if can't overlay node image", func() {
updateProgressSuccess([][]string{{string(models.HostStageStartingInstallation), conf.Role},
{string(models.HostStageWaitingForControlPlane), waitingForBootstrapToPrepare},
{string(models.HostStageInstalling), string(models.HostRoleMaster)},
{string(models.HostStageWritingImageToDisk)},
})
bootstrapSetup()
checkLocalHostname("notlocalhost", nil)
restartNetworkManager(nil)
prepareControllerSuccess()
checkOcBinary(false)
overlayNodeImage(true)
//HostRoleMaster flow:
downloadHostIgnitionSuccess(infraEnvId, hostId, "master-host-id.ign")
writeToDiskSuccess(gomock.Any())
setBootOrderSuccess(gomock.Any())
getEncapsulatedMcSuccess(nil)
overwriteImageSuccess()
ret := installerObj.InstallNode()
Expect(ret).To(HaveOccurred())
})
It("bootstrap role happy flow with invalid hostname", func() {
updateProgressSuccess([][]string{{string(models.HostStageStartingInstallation), conf.Role},
{string(models.HostStageWaitingForControlPlane), waitingForBootstrapToPrepare},
Expand All @@ -387,6 +467,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
checkLocalHostname("InvalidHostname", nil)
restartNetworkManager(nil)
prepareControllerSuccess()
checkOcBinary(true)
startServicesSuccess()
WaitMasterNodesSucccess()
waitForBootkubeSuccess()
Expand Down Expand Up @@ -420,6 +501,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
checkLocalHostname("localhost", nil)
restartNetworkManager(nil)
prepareControllerSuccess()
checkOcBinary(true)
startServicesSuccess()
WaitMasterNodesSucccess()
waitForBootkubeSuccess()
Expand Down Expand Up @@ -454,6 +536,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
checkLocalHostname("notlocalhost", nil)
restartNetworkManager(nil)
prepareControllerSuccess()
checkOcBinary(true)
startServicesSuccess()
WaitMasterNodesSucccessWithCluster(&models.Cluster{
Platform: &models.Platform{
Expand Down Expand Up @@ -520,6 +603,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
checkLocalHostname("notlocalhost", nil)
restartNetworkManager(nil)
prepareControllerSuccess()
checkOcBinary(true)
startServicesSuccess()
WaitMasterNodesSucccess()
waitForBootkubeSuccess()
Expand Down Expand Up @@ -633,6 +717,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
checkLocalHostname("notlocalhost", nil)
restartNetworkManager(nil)
prepareControllerSuccess()
checkOcBinary(true)
startServicesSuccess()

mockbmclient.EXPECT().GetEnabledHostsNamesHosts(gomock.Any(), gomock.Any()).Return(inventoryNamesHost, nil).AnyTimes()
Expand Down Expand Up @@ -1037,6 +1122,9 @@ var _ = Describe("installer HostRoleMaster role", func() {
mockops.EXPECT().CreateRandomHostname(gomock.Any()).Return(nil).Times(1)
}
}
checkOcBinary := func(exists bool) {
mockops.EXPECT().FileExists(openshiftClientBin).Return(exists).Times(1)
}
startServicesSuccess := func() {
services := []string{"bootkube.service", "progress.service", "approve-csr.service"}
for i := range services {
Expand All @@ -1048,7 +1136,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
}
waitForBootkubeSuccess := func() {
mockbmclient.EXPECT().UpdateHostInstallProgress(gomock.Any(), infraEnvId, hostId, models.HostStageWaitingForBootkube, "").Return(nil).Times(1)
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "stat", "/opt/openshift/.bootkube.done").Return("OK", nil).Times(1)
mockops.EXPECT().FileExists("/opt/openshift/.bootkube.done").Return(true).Times(1)
}
bootkubeStatusSuccess := func() {
mockops.EXPECT().ExecPrivilegeCommand(gomock.Any(), "systemctl", "status", "bootkube.service").Return("1", nil).Times(1)
Expand Down Expand Up @@ -1079,6 +1167,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
singleNodeBootstrapSetup()
checkLocalHostname("localhost", nil)
prepareControllerSuccess()
checkOcBinary(true)
startServicesSuccess()
waitForBootkubeSuccess()
bootkubeStatusSuccess()
Expand Down Expand Up @@ -1116,6 +1205,7 @@ var _ = Describe("installer HostRoleMaster role", func() {
singleNodeBootstrapSetup()
checkLocalHostname("localhost", nil)
prepareControllerSuccess()
checkOcBinary(true)
startServicesSuccess()
waitForBootkubeSuccess()
bootkubeStatusSuccess()
Expand Down
14 changes: 14 additions & 0 deletions src/ops/mock_ops.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion src/ops/ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ type Ops interface {
FormatDisk(string) error
CreateManifests(string, []byte) error
DryRebootHappened(markerPath string) bool
FileExists(path string) bool
ExecPrivilegeCommand(liveLogger io.Writer, command string, args ...string) (string, error)
ReadFile(filePath string) ([]byte, error)
GetEncapsulatedMC(ignitionPath string) (*mcfgv1.MachineConfig, error)
Expand Down Expand Up @@ -652,7 +653,12 @@ func (o *ops) CreateManifests(kubeconfig string, content []byte) error {
// The dry run installer creates this file on "Reboot" (instead of actually rebooting)
// We use this function to check whether the given node in the cluster have already rebooted
func (o *ops) DryRebootHappened(markerPath string) bool {
_, err := o.ExecPrivilegeCommand(nil, "stat", markerPath)
return o.FileExists(markerPath)
}

// FileExists checks if a file exists
func (o *ops) FileExists(path string) bool {
_, err := o.ExecPrivilegeCommand(nil, "stat", path)

return err == nil
}
Expand Down