Skip to content

Commit 6e3f5e3

Browse files
Merge pull request #9308 from openshift-cherrypick-robot/cherry-pick-9123-to-release-4.17
[release-4.17] no-jira: Collect bootstrap logs when control plane provisioning fails
2 parents 15a9f9e + 326ed9a commit 6e3f5e3

File tree

5 files changed

+29
-17
lines changed

5 files changed

+29
-17
lines changed

cmd/openshift-install/create.go

+4-11
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ import (
4747
assetstore "github.com/openshift/installer/pkg/asset/store"
4848
targetassets "github.com/openshift/installer/pkg/asset/targets"
4949
destroybootstrap "github.com/openshift/installer/pkg/destroy/bootstrap"
50-
"github.com/openshift/installer/pkg/gather/service"
5150
timer "github.com/openshift/installer/pkg/metrics/timer"
5251
"github.com/openshift/installer/pkg/types/baremetal"
5352
"github.com/openshift/installer/pkg/types/gcp"
@@ -178,21 +177,12 @@ func clusterCreatePostRun(ctx context.Context) (int, error) {
178177
//
179178
timer.StartTimer("Bootstrap Complete")
180179
if err := waitForBootstrapComplete(ctx, config); err != nil {
181-
bundlePath, gatherErr := runGatherBootstrapCmd(ctx, command.RootOpts.Dir)
182-
if gatherErr != nil {
183-
logrus.Error("Attempted to gather debug logs after installation failure: ", gatherErr)
184-
}
185180
if err := logClusterOperatorConditions(ctx, config); err != nil {
186181
logrus.Error("Attempted to gather ClusterOperator status after installation failure: ", err)
187182
}
188183
logrus.Error("Bootstrap failed to complete: ", err.Unwrap())
189184
logrus.Error(err.Error())
190-
if gatherErr == nil {
191-
if err := service.AnalyzeGatherBundle(bundlePath); err != nil {
192-
logrus.Error("Attempted to analyze the debug logs after installation failure: ", err)
193-
}
194-
logrus.Infof("Bootstrap gather logs captured here %q", bundlePath)
195-
}
185+
gatherAndAnalyzeBootstrapLogs(ctx, command.RootOpts.Dir)
196186
return exitCodeBootstrapFailed, nil
197187
}
198188
timer.StopTimer("Bootstrap Complete")
@@ -323,6 +313,9 @@ func runTargetCmd(ctx context.Context, targets ...asset.WritableAsset) func(cmd
323313
logrus.Error(err)
324314
logrus.Exit(exitCodeInstallConfigError)
325315
}
316+
if strings.Contains(err.Error(), asset.ControlPlaneCreationError) {
317+
gatherAndAnalyzeBootstrapLogs(ctx, command.RootOpts.Dir)
318+
}
326319
if strings.Contains(err.Error(), asset.ClusterCreationError) {
327320
logrus.Error(err)
328321
logrus.Exit(exitCodeInfrastructureFailed)

cmd/openshift-install/gather.go

+12
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,18 @@ func newGatherBootstrapCmd(ctx context.Context) *cobra.Command {
8989
return cmd
9090
}
9191

92+
func gatherAndAnalyzeBootstrapLogs(ctx context.Context, directory string) {
93+
bundlePath, gatherErr := runGatherBootstrapCmd(ctx, command.RootOpts.Dir)
94+
if gatherErr != nil {
95+
logrus.Error("Attempted to gather debug logs after installation failure: ", gatherErr)
96+
} else {
97+
if err := service.AnalyzeGatherBundle(bundlePath); err != nil {
98+
logrus.Error("Attempted to analyze the debug logs after installation failure: ", err)
99+
}
100+
logrus.Infof("Bootstrap gather logs captured here %q", bundlePath)
101+
}
102+
}
103+
92104
func runGatherBootstrapCmd(ctx context.Context, directory string) (string, error) {
93105
assetStore, err := assetstore.NewStore(directory)
94106
if err != nil {

pkg/asset/asset.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ import (
1313
)
1414

1515
const (
16-
// ClusterCreationError is the error when terraform fails, implying infrastructure failures
17-
ClusterCreationError = "failed to create cluster"
18-
// InstallConfigError wraps all configuration errors in one single error
16+
// InstallConfigError wraps all configuration errors in one single error.
1917
InstallConfigError = "failed to create install config"
18+
// ClusterCreationError is the error when infrastructure provisioning fails.
19+
ClusterCreationError = "failed to create cluster"
20+
// ControlPlaneCreationError is the error when machine provisioning fails.
21+
ControlPlaneCreationError = "failed to provision control-plane machines"
2022
)
2123

2224
// Asset used to install OpenShift.

pkg/infrastructure/clusterapi/clusterapi.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -363,9 +363,9 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
363363
return allReady, nil
364364
}); err != nil {
365365
if wait.Interrupted(err) {
366-
return fileList, fmt.Errorf("control-plane machines were not provisioned within %v: %w", provisionTimeout, err)
366+
return fileList, fmt.Errorf("%s within %v: %w", asset.ControlPlaneCreationError, provisionTimeout, err)
367367
}
368-
return fileList, fmt.Errorf("control-plane machines are not ready: %w", err)
368+
return fileList, fmt.Errorf("%s: machines are not ready: %w", asset.ControlPlaneCreationError, err)
369369
}
370370
}
371371
timer.StopTimer(machineStage)

pkg/utils/baremetal/bootstrap.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"k8s.io/client-go/rest"
1717
clientwatch "k8s.io/client-go/tools/watch"
1818

19+
"github.com/openshift/installer/pkg/asset"
1920
"github.com/openshift/installer/pkg/infrastructure/baremetal"
2021
)
2122

@@ -106,5 +107,9 @@ func WaitForBaremetalBootstrapControlPlane(ctx context.Context, config *rest.Con
106107
return fmt.Errorf("failed to persist masters file to disk: %w", err)
107108
}
108109

109-
return withSyncErr
110+
if withSyncErr != nil {
111+
// wrap with ControlPlaneCreationError to trigger bootstrap log bundle gather
112+
return fmt.Errorf("%s: %w", asset.ControlPlaneCreationError, withSyncErr)
113+
}
114+
return nil
110115
}

0 commit comments

Comments
 (0)