Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 34 additions & 13 deletions cmd/openshift-install/agent/waitfor.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package agent

import (
"context"

"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -31,6 +33,19 @@ func NewWaitForCmd() *cobra.Command {
return cmd
}

func handleBootstrapError(cluster *agentpkg.Cluster, err error) {
logrus.Debug("Printing the event list gathered from the Agent Rest API")
cluster.PrintInfraEnvRestAPIEventList()
err2 := cluster.API.OpenShift.LogClusterOperatorConditions()
if err2 != nil {
logrus.Error("Attempted to gather ClusterOperator status after wait failure: ", err2)
}
logrus.Info("Use the following commands to gather logs from the cluster")
logrus.Info("openshift-install gather bootstrap --help")
logrus.Error(errors.Wrap(err, "Bootstrap failed to complete: "))
logrus.Exit(exitCodeBootstrapFailed)
}

func newWaitForBootstrapCompleteCmd() *cobra.Command {
return &cobra.Command{
Use: "bootstrap-complete",
Expand All @@ -42,19 +57,16 @@ func newWaitForBootstrapCompleteCmd() *cobra.Command {
if len(assetDir) == 0 {
logrus.Fatal("No cluster installation directory found")
}
cluster, err := agentpkg.WaitForBootstrapComplete(assetDir)

ctx := context.Background()
cluster, err := agentpkg.NewCluster(ctx, assetDir)
if err != nil {
logrus.Debug("Printing the event list gathered from the Agent Rest API")
cluster.PrintInfraEnvRestAPIEventList()
err2 := cluster.API.OpenShift.LogClusterOperatorConditions()
if err2 != nil {
logrus.Error("Attempted to gather ClusterOperator status after wait failure: ", err2)
}
logrus.Info("Use the following commands to gather logs from the cluster")
logrus.Info("openshift-install gather bootstrap --help")
logrus.Error(errors.Wrap(err, "Bootstrap failed to complete: "))
logrus.Exit(exitCodeBootstrapFailed)
}

if err := agentpkg.WaitForBootstrapComplete(cluster); err != nil {
handleBootstrapError(cluster, err)
}
},
}
}
Expand All @@ -70,10 +82,19 @@ func newWaitForInstallCompleteCmd() *cobra.Command {
if len(assetDir) == 0 {
logrus.Fatal("No cluster installation directory found")
}
cluster, err := agentpkg.WaitForInstallComplete(assetDir)

ctx := context.Background()
cluster, err := agentpkg.NewCluster(ctx, assetDir)
if err != nil {
logrus.Debug("Printing the event list gathered from the Agent Rest API")
cluster.PrintInfraEnvRestAPIEventList()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why we would stop printing out the event list if the install failed to complete. Wouldn't it be useful information?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The theory is that at this point we've succeeded in bootstrapping, and it's just that the CVO isn't complete, so the events that happened before/during bootstrapping seem less relevant at this point.
I think this was previously here because it was trying to cover all cases, since there was previously no way of telling if it failed during bootstrapping or after.

logrus.Exit(exitCodeBootstrapFailed)
}

if err := agentpkg.WaitForBootstrapComplete(cluster); err != nil {
handleBootstrapError(cluster, err)
}

if err = agentpkg.WaitForInstallComplete(cluster); err != nil {
logrus.Error(err)
err2 := cluster.API.OpenShift.LogClusterOperatorConditions()
if err2 != nil {
logrus.Error("Attempted to gather ClusterOperator status after wait failure: ", err2)
Expand Down
32 changes: 7 additions & 25 deletions pkg/agent/waitfor.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,7 @@ import (

// WaitForBootstrapComplete Wait for the bootstrap process to complete on
// cluster installations triggered by the agent installer.
func WaitForBootstrapComplete(assetDir string) (*Cluster, error) {

ctx := context.Background()
cluster, err := NewCluster(ctx, assetDir)
if err != nil {
logrus.Warn("unable to make cluster object to track installation")
return nil, err
}

func WaitForBootstrapComplete(cluster *Cluster) error {
start := time.Now()
previous := time.Now()
timeout := 60 * time.Minute
Expand Down Expand Up @@ -56,26 +48,19 @@ func WaitForBootstrapComplete(assetDir string) (*Cluster, error) {
waitErr := waitContext.Err()
if waitErr != nil {
if waitErr == context.Canceled && lastErr != nil {
return cluster, errors.Wrap(lastErr, "bootstrap process returned error")
return errors.Wrap(lastErr, "bootstrap process returned error")
}
if waitErr == context.DeadlineExceeded {
return cluster, errors.Wrap(waitErr, "bootstrap process timed out")
return errors.Wrap(waitErr, "bootstrap process timed out")
}
}

return cluster, nil
return nil
}

// WaitForInstallComplete Waits for the cluster installation triggered by the
// agent installer to be complete.
func WaitForInstallComplete(assetDir string) (*Cluster, error) {

cluster, err := WaitForBootstrapComplete(assetDir)

if err != nil {
return cluster, errors.Wrap(err, "error occured during bootstrap process")
}

func WaitForInstallComplete(cluster *Cluster) error {
timeout := 90 * time.Minute
waitContext, cancel := context.WithTimeout(cluster.Ctx, timeout)
defer cancel()
Expand All @@ -91,10 +76,7 @@ func WaitForInstallComplete(assetDir string) (*Cluster, error) {

waitErr := waitContext.Err()
if waitErr != nil && waitErr != context.Canceled {
if err != nil {
return cluster, errors.Wrap(err, "Error occurred during installation")
}
return cluster, errors.Wrap(waitErr, "Cluster installation timed out")
return errors.Wrap(waitErr, "Cluster installation timed out")
}
return cluster, nil
return nil
}