Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-4.17] no-jira: Collect bootstrap logs when control plane provisioning fails #9308

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions cmd/openshift-install/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ import (
assetstore "github.com/openshift/installer/pkg/asset/store"
targetassets "github.com/openshift/installer/pkg/asset/targets"
destroybootstrap "github.com/openshift/installer/pkg/destroy/bootstrap"
"github.com/openshift/installer/pkg/gather/service"
timer "github.com/openshift/installer/pkg/metrics/timer"
"github.com/openshift/installer/pkg/types/baremetal"
"github.com/openshift/installer/pkg/types/gcp"
Expand Down Expand Up @@ -178,21 +177,12 @@ func clusterCreatePostRun(ctx context.Context) (int, error) {
//
timer.StartTimer("Bootstrap Complete")
if err := waitForBootstrapComplete(ctx, config); err != nil {
bundlePath, gatherErr := runGatherBootstrapCmd(ctx, command.RootOpts.Dir)
if gatherErr != nil {
logrus.Error("Attempted to gather debug logs after installation failure: ", gatherErr)
}
if err := logClusterOperatorConditions(ctx, config); err != nil {
logrus.Error("Attempted to gather ClusterOperator status after installation failure: ", err)
}
logrus.Error("Bootstrap failed to complete: ", err.Unwrap())
logrus.Error(err.Error())
if gatherErr == nil {
if err := service.AnalyzeGatherBundle(bundlePath); err != nil {
logrus.Error("Attempted to analyze the debug logs after installation failure: ", err)
}
logrus.Infof("Bootstrap gather logs captured here %q", bundlePath)
}
gatherAndAnalyzeBootstrapLogs(ctx, command.RootOpts.Dir)
return exitCodeBootstrapFailed, nil
}
timer.StopTimer("Bootstrap Complete")
Expand Down Expand Up @@ -323,6 +313,9 @@ func runTargetCmd(ctx context.Context, targets ...asset.WritableAsset) func(cmd
logrus.Error(err)
logrus.Exit(exitCodeInstallConfigError)
}
if strings.Contains(err.Error(), asset.ControlPlaneCreationError) {
gatherAndAnalyzeBootstrapLogs(ctx, command.RootOpts.Dir)
}
if strings.Contains(err.Error(), asset.ClusterCreationError) {
logrus.Error(err)
logrus.Exit(exitCodeInfrastructureFailed)
Expand Down
12 changes: 12 additions & 0 deletions cmd/openshift-install/gather.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,18 @@ func newGatherBootstrapCmd(ctx context.Context) *cobra.Command {
return cmd
}

func gatherAndAnalyzeBootstrapLogs(ctx context.Context, directory string) {
bundlePath, gatherErr := runGatherBootstrapCmd(ctx, command.RootOpts.Dir)
if gatherErr != nil {
logrus.Error("Attempted to gather debug logs after installation failure: ", gatherErr)
} else {
if err := service.AnalyzeGatherBundle(bundlePath); err != nil {
logrus.Error("Attempted to analyze the debug logs after installation failure: ", err)
}
logrus.Infof("Bootstrap gather logs captured here %q", bundlePath)
}
}

func runGatherBootstrapCmd(ctx context.Context, directory string) (string, error) {
assetStore, err := assetstore.NewStore(directory)
if err != nil {
Expand Down
8 changes: 5 additions & 3 deletions pkg/asset/asset.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ import (
)

const (
// ClusterCreationError is the error when terraform fails, implying infrastructure failures
ClusterCreationError = "failed to create cluster"
// InstallConfigError wraps all configuration errors in one single error
// InstallConfigError wraps all configuration errors in one single error.
InstallConfigError = "failed to create install config"
// ClusterCreationError is the error when infrastructure provisioning fails.
ClusterCreationError = "failed to create cluster"
// ControlPlaneCreationError is the error when machine provisioning fails.
ControlPlaneCreationError = "failed to provision control-plane machines"
)

// Asset used to install OpenShift.
Expand Down
4 changes: 2 additions & 2 deletions pkg/infrastructure/clusterapi/clusterapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,9 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
return allReady, nil
}); err != nil {
if wait.Interrupted(err) {
return fileList, fmt.Errorf("control-plane machines were not provisioned within %v: %w", provisionTimeout, err)
return fileList, fmt.Errorf("%s within %v: %w", asset.ControlPlaneCreationError, provisionTimeout, err)
}
return fileList, fmt.Errorf("control-plane machines are not ready: %w", err)
return fileList, fmt.Errorf("%s: machines are not ready: %w", asset.ControlPlaneCreationError, err)
}
}
timer.StopTimer(machineStage)
Expand Down
7 changes: 6 additions & 1 deletion pkg/utils/baremetal/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"k8s.io/client-go/rest"
clientwatch "k8s.io/client-go/tools/watch"

"github.com/openshift/installer/pkg/asset"
"github.com/openshift/installer/pkg/infrastructure/baremetal"
)

Expand Down Expand Up @@ -106,5 +107,9 @@ func WaitForBaremetalBootstrapControlPlane(ctx context.Context, config *rest.Con
return fmt.Errorf("failed to persist masters file to disk: %w", err)
}

return withSyncErr
if withSyncErr != nil {
// wrap with ControlPlaneCreationError to trigger bootstrap log bundle gather
return fmt.Errorf("%s: %w", asset.ControlPlaneCreationError, withSyncErr)
}
return nil
}