Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions api/hiveextension/v1beta1/agentclusterinstall_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,11 @@ const (
ClusterInstallationStoppedReason string = "ClusterInstallationStopped"
ClusterInstallationStoppedMsg string = "The cluster installation stopped"
ClusterInsufficientAgentsReason string = "InsufficientAgents"
ClusterInsufficientAgentsMsg string = "The cluster currently requires %d agents but only %d have registered"
ClusterInsufficientAgentsMsg string = "The cluster currently requires exactly %d master agents and %d worker agents, but currently registered %d master agents and %d worker agents"
ClusterUnapprovedAgentsReason string = "UnapprovedAgents"
ClusterUnapprovedAgentsMsg string = "The installation is pending on the approval of %d agents"
ClusterUnsyncedAgentsReason string = "UnsyncedAgents"
ClusterUnsyncedAgentsMsg string = "The cluster currently has %d agents with spec error"
ClusterAdditionalAgentsReason string = "AdditionalAgents"
ClusterAdditionalAgentsMsg string = "The cluster currently requires exactly %d agents but have %d registered"

ClusterValidatedCondition hivev1.ClusterInstallConditionType = "Validated"
ClusterValidationsOKMsg string = "The cluster's validations are passing"
Expand Down Expand Up @@ -355,7 +353,7 @@ type ClusterNetworkEntry struct {
type ProvisionRequirements struct {

// ControlPlaneAgents is the number of matching approved and ready Agents with the control plane role
// required to launch the install. Must be either 1 or 3.
// required to launch the install. Must be either 1 or 3-5.
ControlPlaneAgents int `json:"controlPlaneAgents"`

// WorkerAgents is the minimum number of matching approved and ready Agents with the worker role
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ spec:
controlPlaneAgents:
description: |-
ControlPlaneAgents is the number of matching approved and ready Agents with the control plane role
required to launch the install. Must be either 1 or 3.
required to launch the install. Must be either 1 or 3-5.
type: integer
workerAgents:
description: |-
Expand Down
2 changes: 1 addition & 1 deletion config/crd/resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ spec:
controlPlaneAgents:
description: |-
ControlPlaneAgents is the number of matching approved and ready Agents with the control plane role
required to launch the install. Must be either 1 or 3.
required to launch the install. Must be either 1 or 3-5.
type: integer
workerAgents:
description: |-
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ spec:
controlPlaneAgents:
description: |-
ControlPlaneAgents is the number of matching approved and ready Agents with the control plane role
required to launch the install. Must be either 1 or 3.
required to launch the install. Must be either 1 or 3-5.
type: integer
workerAgents:
description: |-
Expand Down
99 changes: 92 additions & 7 deletions internal/bminventory/inventory.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,22 +317,26 @@ func (b *bareMetalInventory) setDefaultRegisterClusterParams(ctx context.Context
{Cidr: models.Subnet(b.Config.DefaultServiceNetworkCidr)},
}
}

if params.NewClusterParams.MachineNetworks == nil {
params.NewClusterParams.MachineNetworks = []*models.MachineNetwork{}
}

if params.NewClusterParams.VipDhcpAllocation == nil {
params.NewClusterParams.VipDhcpAllocation = swag.Bool(false)
}

if params.NewClusterParams.Hyperthreading == nil {
params.NewClusterParams.Hyperthreading = swag.String(models.ClusterHyperthreadingAll)
}

if params.NewClusterParams.SchedulableMasters == nil {
params.NewClusterParams.SchedulableMasters = swag.Bool(false)
}
if params.NewClusterParams.HighAvailabilityMode == nil {
params.NewClusterParams.HighAvailabilityMode = swag.String(models.ClusterHighAvailabilityModeFull)
}

params.NewClusterParams.HighAvailabilityMode, params.NewClusterParams.ControlPlaneCount = common.GetDefaultHighAvailabilityAndMasterCountParams(
params.NewClusterParams.HighAvailabilityMode, params.NewClusterParams.ControlPlaneCount,
)

log.Infof("Verifying cluster platform and user-managed-networking, got platform=%s and userManagedNetworking=%s", getPlatformType(params.NewClusterParams.Platform), common.BoolPtrForLog(params.NewClusterParams.UserManagedNetworking))
platform, userManagedNetworking, err := provider.GetActualCreateClusterPlatformParams(params.NewClusterParams.Platform, params.NewClusterParams.UserManagedNetworking, params.NewClusterParams.HighAvailabilityMode, params.NewClusterParams.CPUArchitecture)
Expand Down Expand Up @@ -441,6 +445,15 @@ func (b *bareMetalInventory) validateRegisterClusterInternalParams(params *insta
}
}

// should be called after defaults were set
if err := validateHighAvailabilityWithControlPlaneCount(
swag.StringValue(params.NewClusterParams.HighAvailabilityMode),
swag.Int64Value(params.NewClusterParams.ControlPlaneCount),
swag.StringValue(params.NewClusterParams.OpenshiftVersion),
); err != nil {
return common.NewApiError(http.StatusBadRequest, err)
}

return nil
}

Expand Down Expand Up @@ -641,6 +654,7 @@ func (b *bareMetalInventory) RegisterClusterInternal(
KubeKeyNamespace: kubeKey.Namespace,
TriggerMonitorTimestamp: time.Now(),
MachineNetworkCidrUpdatedAt: time.Now(),
ControlPlaneCount: swag.Int64Value(params.NewClusterParams.ControlPlaneCount),
}

newOLMOperators, err := b.getOLMMonitoredOperators(log, cluster, params, *releaseImage.Version)
Expand Down Expand Up @@ -1308,21 +1322,28 @@ func (b *bareMetalInventory) InstallClusterInternal(ctx context.Context, params
sortedHosts, canRefreshRoles := host.SortHosts(cluster.Hosts)
if canRefreshRoles {
for i := range sortedHosts {
updated, err = b.hostApi.AutoAssignRole(ctx, cluster.Hosts[i], tx)
updated, err = b.hostApi.AutoAssignRole(ctx, cluster.Hosts[i], tx, swag.Int(int(cluster.ControlPlaneCount)))
if err != nil {
return err
}
autoAssigned = autoAssigned || updated
}
}

// Refresh schedulable masters again after all roles are assigned
if internalErr := b.clusterApi.RefreshSchedulableMastersForcedTrue(ctx, *cluster.ID); internalErr != nil {
log.WithError(internalErr).Errorf("Failed to refresh SchedulableMastersForcedTrue while installing cluster <%s>", cluster.ID)
return internalErr
}

hasIgnoredValidations := common.IgnoredValidationsAreSet(cluster)
if hasIgnoredValidations {
eventgen.SendValidationsIgnoredEvent(ctx, b.eventsHandler, *cluster.ID)
}
//usage for auto role selection is measured only for day1 clusters with more than
//3 hosts (which would automatically be assigned as masters if the hw is sufficient)
if usages, u_err := usage.Unmarshal(cluster.Cluster.FeatureUsage); u_err == nil {
report := cluster.Cluster.TotalHostCount > common.MinMasterHostsNeededForInstallation && autoAssigned
report := cluster.Cluster.TotalHostCount > common.MinMasterHostsNeededForInstallationInHaMode && autoAssigned
if hasIgnoredValidations {
b.setUsage(true, usage.ValidationsIgnored, nil, usages)
}
Expand Down Expand Up @@ -1435,9 +1456,11 @@ func (b *bareMetalInventory) InstallSingleDay2HostInternal(ctx context.Context,
if h, err = b.getHost(ctx, infraEnvId.String(), hostId.String()); err != nil {
return err
}

// auto select host roles if not selected yet.
err = b.db.Transaction(func(tx *gorm.DB) error {
if _, err = b.hostApi.AutoAssignRole(ctx, &h.Host, tx); err != nil {
// no need to specify expected master count for day2 hosts, as their suggested role will always be worker
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There have been people asking for the ability to add master nodes through the "oc adm node-image" framework which uses ABI and assisted. For future reference, ABI may be specifying expected master count to 1 for this scenario. To be clear I'm not asking for changes here as this PR is solely for day-1. cc @andfasano

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In day2, assisted doesn't always have a way to know the roles of the other hosts from day1. Therefore, the usage of the new controlPlaneCount is problematic. I believe for this scenario it would be best to set the role manually in case of master

if _, err = b.hostApi.AutoAssignRole(ctx, &h.Host, tx, nil); err != nil {
return err
}
return nil
Expand Down Expand Up @@ -1509,7 +1532,8 @@ func (b *bareMetalInventory) V2InstallHost(ctx context.Context, params installer
return common.NewApiError(http.StatusConflict, fmt.Errorf("cannot install host in state %s", swag.StringValue(h.Status)))
}

_, err = b.hostApi.AutoAssignRole(ctx, h, b.db)
// no need to specify expected master count for day2 hosts, as their suggested role will always be worker
_, err = b.hostApi.AutoAssignRole(ctx, h, b.db, nil)
if err != nil {
log.Errorf("Failed to update role for host %s", params.HostID)
return common.GenerateErrorResponder(err)
Expand Down Expand Up @@ -1911,9 +1935,66 @@ func (b *bareMetalInventory) validateAndUpdateClusterParams(ctx context.Context,
return installer.V2UpdateClusterParams{}, err
}

// We don't want to update ControlPlaneCount in day2 clusters as we don't know the previous hosts
if params.ClusterUpdateParams.ControlPlaneCount != nil && swag.StringValue(cluster.Kind) != models.ClusterKindAddHostsCluster {
if err := validateHighAvailabilityWithControlPlaneCount(
swag.StringValue(cluster.HighAvailabilityMode),
swag.Int64Value(params.ClusterUpdateParams.ControlPlaneCount),
cluster.OpenshiftVersion,
); err != nil {
return installer.V2UpdateClusterParams{}, err
}
}

return *params, nil
}

func validateHighAvailabilityWithControlPlaneCount(highAvailabilityMode string, controlPlaneCount int64, openshiftVersion string) error {
if highAvailabilityMode == models.ClusterCreateParamsHighAvailabilityModeNone &&
controlPlaneCount != 1 {
return common.NewApiError(
http.StatusBadRequest,
errors.New("single-node clusters must have a single control plane node"),
)
}

stretchedClustersNotSuported, err := common.BaseVersionLessThan(common.MinimumVersionForStretchedControlPlanesCluster, openshiftVersion)
if err != nil {
return err
}

if highAvailabilityMode == models.ClusterCreateParamsHighAvailabilityModeFull &&
controlPlaneCount != common.AllowedNumberOfMasterHostsForInstallationInHaModeOfOCP417OrOlder &&
stretchedClustersNotSuported {
return common.NewApiError(
http.StatusBadRequest,
fmt.Errorf(
"there should be exactly %d dedicated control plane nodes for high availability mode %s in openshift version older than %s",
common.AllowedNumberOfMasterHostsForInstallationInHaModeOfOCP417OrOlder,
highAvailabilityMode,
common.MinimumVersionForStretchedControlPlanesCluster,
),
)
}

if highAvailabilityMode == models.ClusterCreateParamsHighAvailabilityModeFull &&
(controlPlaneCount < common.MinMasterHostsNeededForInstallationInHaMode ||
controlPlaneCount > common.MaxMasterHostsNeededForInstallationInHaModeOfOCP418OrNewer) {
return common.NewApiError(
http.StatusBadRequest,
fmt.Errorf(
"there should be %d-%d dedicated control plane nodes for high availability mode %s in openshift version %s or newer",
common.MinMasterHostsNeededForInstallationInHaMode,
common.MaxMasterHostsNeededForInstallationInHaModeOfOCP418OrNewer,
highAvailabilityMode,
common.MinimumVersionForStretchedControlPlanesCluster,
),
)
}

return nil
}

func (b *bareMetalInventory) V2UpdateCluster(ctx context.Context, params installer.V2UpdateClusterParams) middleware.Responder {
c, err := b.v2UpdateClusterInternal(ctx, params, Interactive)
if err != nil {
Expand Down Expand Up @@ -2358,6 +2439,10 @@ func (b *bareMetalInventory) updateClusterData(_ context.Context, cluster *commo
b.setUserManagedNetworkingAndMultiNodeUsage(swag.BoolValue(params.ClusterUpdateParams.UserManagedNetworking), *cluster.HighAvailabilityMode, usages)
}

if params.ClusterUpdateParams.ControlPlaneCount != nil && *params.ClusterUpdateParams.ControlPlaneCount != cluster.ControlPlaneCount {
updates["control_plane_count"] = *params.ClusterUpdateParams.ControlPlaneCount
}

if len(updates) > 0 {
updates["trigger_monitor_timestamp"] = time.Now()
err = db.Model(&common.Cluster{}).Where("id = ?", cluster.ID.String()).Updates(updates).Error
Expand Down
Loading