Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion apis/metal3.io/v1alpha1/baremetalhost_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,9 @@ const (
// PowerManagementError is an error condition occurring when the
// controller is unable to modify the power state of the Host.
PowerManagementError ErrorType = "power management error"
// TooManyHostsError is an error condition occurring when
// there are too many hosts simultaneously being provisioned
TooManyHostsError ErrorType = "too many hosts error"
Comment thread
andfasano marked this conversation as resolved.
)

// ProvisioningState defines the states the provisioner will report
Expand Down Expand Up @@ -520,7 +523,7 @@ type BareMetalHostStatus struct {

// ErrorType indicates the type of failure encountered when the
// OperationalStatus is OperationalStatusError
// +kubebuilder:validation:Enum=registration error;inspection error;provisioning error;power management error
// +kubebuilder:validation:Enum=registration error;inspection error;provisioning error;power management error;too many hosts error
ErrorType ErrorType `json:"errorType,omitempty"`

// LastUpdated identifies when this status was last observed.
Expand Down
1 change: 1 addition & 0 deletions config/crd/bases/metal3.io_baremetalhosts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ spec:
- inspection error
- provisioning error
- power management error
- too many hosts error
type: string
goodCredentials:
description: the last credentials we were able to validate as working
Expand Down
1 change: 1 addition & 0 deletions config/render/capm3.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ spec:
- inspection error
- provisioning error
- power management error
- too many hosts error
type: string
goodCredentials:
description: the last credentials we were able to validate as working
Expand Down
82 changes: 82 additions & 0 deletions controllers/metal3.io/baremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"os"
"strconv"
"strings"
"sync"
"time"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -50,9 +51,21 @@ const (
unmanagedRetryDelay = time.Minute * 10
provisionerNotReadyRetryDelay = time.Second * 30
rebootAnnotationPrefix = "reboot.metal3.io"
maxProvisioningHostsDefault = 20
)

var maxProvisioningHosts int
Comment thread
andfasano marked this conversation as resolved.

func init() {
maxProvisioningHosts = maxProvisioningHostsDefault
if maxHostsStr := os.Getenv("MAX_CONCURRENT_PROVISIONING_HOSTS"); maxHostsStr != "" {
value, err := strconv.Atoi(maxHostsStr)
if err != nil {
fmt.Fprintf(os.Stderr, "Cannot start: Invalid value set for variable MAX_CONCURRENT_PROVISIONING_HOSTS=%s", maxHostsStr)
os.Exit(1)
}
maxProvisioningHosts = value
}
}

// BareMetalHostReconciler reconciles a BareMetalHost object
Expand All @@ -61,6 +74,22 @@ type BareMetalHostReconciler struct {
Log logr.Logger
Scheme *runtime.Scheme
ProvisionerFactory provisioner.Factory

currentProvisioningHosts map[string]struct{}
mu sync.Mutex
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be preferable to have a name that describes what the mutex is protecting.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok. I can move them in a separate struct to make it even more clear, and it could help a little bit in testing

}

// NewBareMetalHostReconciler creates a new reconciler instance
func NewBareMetalHostReconciler(client client.Client, scheme *runtime.Scheme, factory provisioner.Factory) *BareMetalHostReconciler {
r := BareMetalHostReconciler{
Client: client,
Log: ctrl.Log.WithName("controllers").WithName("BareMetalHost"),
Scheme: scheme,
ProvisionerFactory: factory,

currentProvisioningHosts: make(map[string]struct{}),
}
return &r
}

// Instead of passing a zillion arguments to the action of a phase,
Expand Down Expand Up @@ -914,9 +943,62 @@ func hostHasFinalizer(host *metal3v1alpha1.BareMetalHost) bool {
return utils.StringInList(host.Finalizers, metal3v1alpha1.BareMetalHostFinalizer)
}

// Resync the set at the startup
func (r *BareMetalHostReconciler) syncProvisioningHosts(mgr ctrl.Manager) {
hosts := &metal3v1alpha1.BareMetalHostList{}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The controller runtime uses a caching client, which means when we ask for resources that we're subscribed to we look at the cache and get a list back quickly. If we eliminate the mutex management and just write a function that returns a number, it should be plenty fast enough to call every time we start to reconcile.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got your point and will give it a try, but I'm not sure it could work, since the cache is not updated immediately, thus still risking a surge (while using a sync set every thread has always an effectively updated view). Anyhow I'll check

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR uses the same cache to initialize the data structure, right?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really, I've the APIReader() returned by the manager to access directly the API server

err := mgr.GetAPIReader().List(context.TODO(), hosts)
if err != nil {
os.Exit(1)
}

for _, host := range hosts.Items {
r.checkProvisioningHost(host, host.Status.Provisioning.State)
}
}

// Check if there's a free slot for hosts to be provisioned
func (r *BareMetalHostReconciler) checkProvisioningHost(host metal3v1alpha1.BareMetalHost, state metal3v1alpha1.ProvisioningState) bool {
r.mu.Lock()
defer r.mu.Unlock()

switch state {
case metal3v1alpha1.StateInspecting, metal3v1alpha1.StateProvisioning:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about deprovisioning? Isn't that just as hard on ironic-conductor? I don't know that we ever want to block deprovisioning but we probably don't want to start provisioning a bunch of stuff when we already have a large amount of deprovisioning in-flight.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good question, and in reality deprovisioning wasn't part of initial proposal, which focused only on the provisioning phase. Maybe some Ironic expert could comment better (@dtantsur ?), anyhow the deleting part appeared to be not so hard as the provisioning part.

if len(r.currentProvisioningHosts) >= maxProvisioningHosts {
return false
}
r.currentProvisioningHosts[host.Name] = struct{}{}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assumes that we will successfully write the state change. If writing that fails we will end up back here checking for capacity again against a map that already contains the host.

The good news is that this is a map not a list so we can only store one copy, and a single host repeatedly failing will not fill up our quota and block everything. Nevertheless, this exposes us to systemic risk: e.g. if we try to provision 20 hosts and then the k8s API becomes briefly unavailable for some (presumably not unrelated) reason, the bmo is now deadlocked and cannot provision anything ever again until it is restarted.

We have a mechanism in the info struct for deferring function calls until after the write succeeds, and I think we need to use that here.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch, I'll check how to use that

default:
delete(r.currentProvisioningHosts, host.Name)
}

return true
}

// Check if there's a free slot for hosts that have been previously delayed
func (r *BareMetalHostReconciler) checkDelayedHost(info *reconcileInfo) (bool, actionResult) {
r.mu.Lock()
defer r.mu.Unlock()

switch info.host.Status.ErrorType {
case metal3v1alpha1.TooManyHostsError:
if len(r.currentProvisioningHosts) >= maxProvisioningHosts {
// No available slots, current action delayed
return true, recordActionFailure(info, metal3v1alpha1.TooManyHostsError, "Delayed, too many hosts")
}

// A slot could be available, let's cleanup the host and retry
info.host.ClearError()
return true, actionContinue{}
}

return false, nil
}

// SetupWithManager reigsters the reconciler to be run by the manager
func (r *BareMetalHostReconciler) SetupWithManager(mgr ctrl.Manager) error {

r.syncProvisioningHosts(mgr)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe there is a race here; we haven't yet started watching resources or obtained the leader lock, so Hosts can be modified after we build our initial state without us seeing the changes (potentially ever, since our updates are edge-triggered).

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any other initialization point then that could be used (before reconciling)? Otherwise the other approach could be to keep in sync the set at the beginning of every reconcile loop, in line with @dhellmann's suggestion


maxConcurrentReconciles := 3
if mcrEnv, ok := os.LookupEnv("BMO_CONCURRENCY"); ok {
mcr, err := strconv.Atoi(mcrEnv)
Expand Down
7 changes: 1 addition & 6 deletions controllers/metal3.io/baremetalhost_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,7 @@ func newTestReconcilerWithProvisionerFactory(factory provisioner.Factory, initOb
bmcSecret := newBMCCredsSecret(defaultSecretName, "User", "Pass")
c.Create(goctx.TODO(), bmcSecret)

return &BareMetalHostReconciler{
Client: c,
Scheme: scheme.Scheme,
ProvisionerFactory: factory,
Log: ctrl.Log.WithName("controllers").WithName("BareMetalHost"),
}
return NewBareMetalHostReconciler(c, scheme.Scheme, factory)
}

func newTestReconciler(initObjs ...runtime.Object) *BareMetalHostReconciler {
Expand Down
8 changes: 1 addition & 7 deletions controllers/metal3.io/demo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (

"k8s.io/client-go/kubernetes/scheme"

ctrl "sigs.k8s.io/controller-runtime"
fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

Expand All @@ -24,12 +23,7 @@ func newDemoReconciler(initObjs ...runtime.Object) *BareMetalHostReconciler {
bmcSecret := newSecret(defaultSecretName, map[string]string{"username": "User", "password": "Pass"})
c.Create(goctx.TODO(), bmcSecret)

return &BareMetalHostReconciler{
Client: c,
Scheme: scheme.Scheme,
ProvisionerFactory: demo.New,
Log: ctrl.Log.WithName("controller").WithName("BareMetalHost"),
}
return NewBareMetalHostReconciler(c, scheme.Scheme, demo.New)
}

// TestDemoRegistrationError tests that a host with the right name reports
Expand Down
22 changes: 19 additions & 3 deletions controllers/metal3.io/host_state_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,13 @@ func recordStateEnd(info *reconcileInfo, host *metal3v1alpha1.BareMetalHost, sta
}

func (hsm *hostStateMachine) updateHostStateFrom(initialState metal3v1alpha1.ProvisioningState,
info *reconcileInfo) {
info *reconcileInfo) bool {
if hsm.NextState != initialState {

if !hsm.Reconciler.checkProvisioningHost(*info.host, hsm.NextState) {
return false
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a little strange that we will be stuck in e.g. the Ready state with an error, rather than the Provisioning state. This does make things simpler though.


info.log.Info("changing provisioning state",
"old", initialState,
"new", hsm.NextState)
Expand Down Expand Up @@ -104,11 +109,22 @@ func (hsm *hostStateMachine) updateHostStateFrom(initialState metal3v1alpha1.Pro
}
}
}

return true
}

func (hsm *hostStateMachine) ReconcileState(info *reconcileInfo) actionResult {
func (hsm *hostStateMachine) ReconcileState(info *reconcileInfo) (actionRes actionResult) {
initialState := hsm.Host.Status.Provisioning.State
defer hsm.updateHostStateFrom(initialState, info)
defer func() {
if !hsm.updateHostStateFrom(initialState, info) {
actionRes = recordActionFailure(info, metal3v1alpha1.TooManyHostsError, "Delayed, too many hosts")
}
}()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At one stage @dhellmann had a proposal for a generic way to attach actions to particular state transitions, but I don't remember the context...

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggested adding a transition(newState) method to the state machine to use instead of assigning the new state directly. By doing that, all of the transitions would pass through one place and we could have a switch statement for handling before & after calls consistently.


// Check if any immediate action is required for a delayed host
if delayed, actionRes := hsm.Reconciler.checkDelayedHost(info); delayed {
return actionRes
}

if hsm.checkInitiateDelete() {
info.log.Info("Initiating host deletion")
Expand Down
6 changes: 3 additions & 3 deletions controllers/metal3.io/host_state_machine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ func TestErrorCountIncreasedWhenProvisionerFails(t *testing.T) {
for _, tt := range tests {
t.Run(tt.Scenario, func(t *testing.T) {
prov := &mockProvisioner{}
hsm := newHostStateMachine(tt.Host, &BareMetalHostReconciler{}, prov)
hsm := newHostStateMachine(tt.Host, newTestReconciler(), prov)
info := makeDefaultReconcileInfo(tt.Host)

prov.setNextError("some error")
Expand All @@ -220,7 +220,7 @@ func TestErrorCountIncreasedWhenProvisionerFails(t *testing.T) {
func TestErrorCountIncreasedWhenRegistrationFails(t *testing.T) {
bmh := host(metal3v1alpha1.StateRegistering).build()
prov := &mockProvisioner{}
hsm := newHostStateMachine(bmh, &BareMetalHostReconciler{}, prov)
hsm := newHostStateMachine(bmh, newTestReconciler(), prov)
info := makeDefaultReconcileInfo(bmh)
bmh.Status.GoodCredentials = metal3v1alpha1.CredentialsStatus{}

Expand Down Expand Up @@ -265,7 +265,7 @@ func TestErrorCountCleared(t *testing.T) {
for _, tt := range tests {
t.Run(tt.Scenario, func(t *testing.T) {
prov := &mockProvisioner{}
hsm := newHostStateMachine(tt.Host, &BareMetalHostReconciler{}, prov)
hsm := newHostStateMachine(tt.Host, newTestReconciler(), prov)
info := makeDefaultReconcileInfo(tt.Host)

info.host.Status.ErrorCount = 1
Expand Down
8 changes: 2 additions & 6 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,8 @@ func main() {
ironic.LogStartup()
}

if err = (&metal3iocontroller.BareMetalHostReconciler{
Client: mgr.GetClient(),
Log: ctrl.Log.WithName("controllers").WithName("BareMetalHost"),
Scheme: mgr.GetScheme(),
ProvisionerFactory: provisionerFactory,
}).SetupWithManager(mgr); err != nil {
reconciler := metal3iocontroller.NewBareMetalHostReconciler(mgr.GetClient(), mgr.GetScheme(), provisionerFactory)
if err = reconciler.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "BareMetalHost")
os.Exit(1)
}
Expand Down