-
Notifications
You must be signed in to change notification settings - Fork 74
[wmco] Improve error handling #42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
openshift-merge-robot
merged 3 commits into
openshift:master
from
sebsoto:operator_changes
Apr 28, 2020
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,44 +26,61 @@ import ( | |
| "sigs.k8s.io/controller-runtime/pkg/source" | ||
| ) | ||
|
|
||
| var log = logf.Log.WithName("controller_wmc") | ||
| const ( | ||
| // ControllerName is the name of the WMC controller | ||
| ControllerName = "windowsmachineconfig-controller" | ||
| ) | ||
|
|
||
| /** | ||
| * USER ACTION REQUIRED: This is a scaffold file intended for the user to modify with their own Controller | ||
| * business logic. Delete these comments after modifying this file.* | ||
| */ | ||
| var log = logf.Log.WithName("controller_wmc") | ||
|
|
||
| // Add creates a new WindowsMachineConfig Controller and adds it to the Manager. The Manager will set fields on the Controller | ||
| // and Start it when the Manager is Started. | ||
| func Add(mgr manager.Manager) error { | ||
| return add(mgr, newReconciler(mgr)) | ||
| reconciler, err := newReconciler(mgr) | ||
| if err != nil { | ||
| return errors.Wrapf(err, "could not create %s reconciler", ControllerName) | ||
| } | ||
| return add(mgr, reconciler) | ||
| } | ||
|
|
||
| // newReconciler returns a new reconcile.Reconciler | ||
| func newReconciler(mgr manager.Manager) reconcile.Reconciler { | ||
| func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) { | ||
| // TODO: This should be moved out to validation for reconciler struct. | ||
| // Jira story: https://issues.redhat.com/browse/WINC-277 | ||
| clientset, err := kubernetes.NewForConfig(mgr.GetConfig()) | ||
| if err != nil { | ||
| log.Error(err, "error while getting clientset") | ||
| return nil, errors.Wrap(err, "error creating kubernetes clientset") | ||
| } | ||
| return &ReconcileWindowsMachineConfig{client: mgr.GetClient(), scheme: mgr.GetScheme(), k8sclientset: clientset} | ||
|
|
||
| windowsVMs := make(map[types.WindowsVM]bool) | ||
| vmTracker, err := tracker.NewTracker(clientset, windowsVMs) | ||
| if err != nil { | ||
| return nil, errors.Wrap(err, "failed to instantiate tracker") | ||
| } | ||
|
|
||
| return &ReconcileWindowsMachineConfig{client: mgr.GetClient(), | ||
| scheme: mgr.GetScheme(), | ||
| k8sclientset: clientset, | ||
| tracker: vmTracker, | ||
| windowsVMs: windowsVMs, | ||
| }, | ||
| nil | ||
| } | ||
|
|
||
| // add adds a new Controller to mgr with r as the reconcile.Reconciler | ||
| func add(mgr manager.Manager, r reconcile.Reconciler) error { | ||
| // Create a new controller | ||
| c, err := controller.New("windowsmachineconfig-controller", mgr, controller.Options{Reconciler: r}) | ||
| c, err := controller.New(ControllerName, mgr, controller.Options{Reconciler: r}) | ||
| if err != nil { | ||
| return err | ||
| return errors.Wrapf(err, "could not create %s", ControllerName) | ||
| } | ||
| // TODO: Add a predicate here. As of now, we get event notifications for all the WindowsMachineConfig objects, we | ||
| // want the predicate to filter the WMC object called `instance` | ||
| // Jira Story: https://issues.redhat.com/browse/WINC-282 | ||
| // Watch for changes to primary resource WindowsMachineConfig | ||
| err = c.Watch(&source.Kind{Type: &wmcapi.WindowsMachineConfig{}}, &handler.EnqueueRequestForObject{}) | ||
| if err != nil { | ||
| return err | ||
| return errors.Wrap(err, "could not create watch on WindowsMachineConfig objects") | ||
| } | ||
|
|
||
| // TODO(user): Modify this to be the types you create that are owned by the primary resource | ||
|
|
@@ -145,13 +162,6 @@ func (r *ReconcileWindowsMachineConfig) Reconcile(request reconcile.Request) (re | |
| return reconcile.Result{}, fmt.Errorf("error instantiating cloud provider: %v", err) | ||
| } | ||
| } | ||
| if r.k8sclientset == nil { | ||
| return reconcile.Result{}, nil | ||
| } | ||
| if r.windowsVMs == nil { | ||
| // populate the windowsVM map here from ConfigMap as source of truth | ||
| r.windowsVMs = make(map[types.WindowsVM]bool) | ||
| } | ||
| // Get the current number of Windows VMs created by WMCO. | ||
| // TODO: Get all the running Windows nodes in the cluster | ||
| // jira story: https://issues.redhat.com/browse/WINC-280 | ||
|
|
@@ -177,18 +187,11 @@ func (r *ReconcileWindowsMachineConfig) Reconcile(request reconcile.Request) (re | |
| // cluster | ||
| func (r *ReconcileWindowsMachineConfig) reconcileWindowsNodes(desired, current int) bool { | ||
| log.Info("replicas", "current", current, "desired", desired) | ||
| if r.tracker == nil { | ||
| var err error | ||
| r.tracker, err = tracker.NewTracker(r.k8sclientset, r.windowsVMs) | ||
| if err != nil { | ||
| log.Error(err, "tracker instantiation failed") | ||
| } | ||
| } | ||
| var vmCount bool | ||
| if desired < current { | ||
| vmCount = r.deleteWindowsVMs(current - desired) | ||
| vmCount = r.removeWorkerNodes(current - desired) | ||
| } else if desired > current { | ||
| vmCount = r.createWindowsWorkerNodes(desired - current) | ||
| vmCount = r.addWorkerNodes(desired - current) | ||
| } else if desired == current { | ||
| return true | ||
| } | ||
|
|
@@ -211,76 +214,98 @@ func chooseRandomNode(windowsVMs map[types.WindowsVM]bool) types.WindowsVM { | |
| return nil | ||
| } | ||
|
|
||
| // deleteWindowsVMs deletes the required number of Windows VMs from the cluster and returns a bool indicating the | ||
| // status of deletion. This method will return false if any of the VMs fail to get deleted. | ||
| // removeWorkerNode terminates the underlying VM and removes the given vm from the list of VMs | ||
| func (r *ReconcileWindowsMachineConfig) removeWorkerNode(vm types.WindowsVM) error { | ||
| // VM is missing credentials, this can occur if there was a failure initially creating it. We can consider the | ||
| // actual VM terminated as there is nothing we can do with it. | ||
| if vm.GetCredentials() == nil || len(vm.GetCredentials().GetInstanceId()) == 0 { | ||
| delete(r.windowsVMs, vm) | ||
sebsoto marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return nil | ||
| } | ||
|
|
||
| // Terminate the instance via its instance id | ||
| id := vm.GetCredentials().GetInstanceId() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: rename id to instancedID for specific naming |
||
| log.V(1).Info("destroying the Windows VM", "ID", id) | ||
|
|
||
| // Delete the Windows VM from cloud provider | ||
| if err := r.cloudProvider.DestroyWindowsVM(id); err != nil { | ||
| return errors.Wrapf(err, "error destroying VM with ID %s", id) | ||
| } | ||
|
|
||
| // Remove VM from our list of tracked VMs | ||
| delete(r.windowsVMs, vm) | ||
sebsoto marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| log.Info("Windows worker has been removed from the cluster", "ID", id) | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // removeWorkerNodes removes the required number of Windows VMs from the cluster and returns a bool indicating the | ||
| // success. This method will return false if any of the VMs fail to be removed. | ||
| // TODO: This method should return a slice of errors that we collected. | ||
| // Jira story: https://issues.redhat.com/browse/WINC-266 | ||
| func (r *ReconcileWindowsMachineConfig) deleteWindowsVMs(count int) bool { | ||
| func (r *ReconcileWindowsMachineConfig) removeWorkerNodes(count int) bool { | ||
| var errs []error | ||
| // From the list of Windows VMs choose randomly count number of VMs. | ||
| for i := 0; i < count; i++ { | ||
| // Choose of the Windows worker nodes randomly | ||
| vmTobeDeleted := chooseRandomNode(r.windowsVMs) | ||
| if vmTobeDeleted.GetCredentials() == nil { | ||
| errs = append(errs, errors.New("VM picked for deletion has no credentials, will reconcile...")) | ||
| vm := chooseRandomNode(r.windowsVMs) | ||
aravindhp marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if vm == nil { | ||
| errs = append(errs, fmt.Errorf("expected VM and got a nil value")) | ||
| continue | ||
| } | ||
|
|
||
| // Get the instance associated with the Windows worker node | ||
| instancedID := vmTobeDeleted.GetCredentials().GetInstanceId() | ||
| if len(instancedID) == 0 { | ||
| errs = append(errs, errors.New("VM picked for deletion has no instance ID, will reconcile...")) | ||
| continue | ||
| } | ||
|
|
||
| // Delete the Windows VM from cloud provider | ||
| log.V(1).Info("deleting the Windows VM", "ID", instancedID) | ||
| if err := r.cloudProvider.DestroyWindowsVM(instancedID); err != nil { | ||
| log.Error(err, "error deleting Windows VM", "ID", instancedID) | ||
| errs = append(errs, errors.Wrapf(err, "error deleting Windows VM %s", instancedID)) | ||
| if err := r.removeWorkerNode(vm); err != nil { | ||
| errs = append(errs, err) | ||
| } | ||
| delete(r.windowsVMs, vmTobeDeleted) | ||
| log.Info("Windows VM has been deleted and removed from the cluster", "ID", instancedID) | ||
| } | ||
|
|
||
| // If any of the Windows VM fails to get deleted consider this as a failure and return false | ||
| // If any of the Windows VM fails to get removed consider this as a failure and return false | ||
| if len(errs) > 0 { | ||
| return false | ||
| } | ||
| return true | ||
| } | ||
|
|
||
sebsoto marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // createWindowsVMs creates the required number of windows Windows VM and configure them to make | ||
| // addWorkerNode creates a new Windows VM and configures it, adding it as a node object to the cluster | ||
| func (r *ReconcileWindowsMachineConfig) addWorkerNode() (types.WindowsVM, error) { | ||
| // Create Windows VM in the cloud provider | ||
| log.V(1).Info("creating a Windows VM") | ||
| vm, err := r.cloudProvider.CreateWindowsVM() | ||
| if err != nil { | ||
| return nil, errors.Wrap(err, "error creating windows VM") | ||
| } | ||
|
|
||
sebsoto marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| log.V(1).Info("configuring the Windows VM", "ID", vm.GetCredentials().GetInstanceId()) | ||
| nc := nodeconfig.NewNodeConfig(r.k8sclientset, vm) | ||
| if err := nc.Configure(); err != nil { | ||
| // TODO: Unwrap to extract correct error | ||
| if cleanupErr := r.removeWorkerNode(vm); cleanupErr != nil { | ||
| log.Error(cleanupErr, "failed to cleanup VM", "VM", vm.GetCredentials().GetInstanceId()) | ||
| } | ||
| return nil, errors.Wrap(err, "failed to configure Windows VM") | ||
| } | ||
|
|
||
| log.Info("Windows VM has joined the cluster as a worker node", "ID", nc.GetCredentials().GetInstanceId()) | ||
| return vm, nil | ||
| } | ||
|
|
||
| // addWorkerNodes creates the required number of Windows VMs and configures them to make | ||
| // them a worker node | ||
| // TODO: This method should return a slice of errors that we collected. | ||
| // Jira story: https://issues.redhat.com/browse/WINC-266 | ||
| func (r *ReconcileWindowsMachineConfig) createWindowsWorkerNodes(count int) bool { | ||
| func (r *ReconcileWindowsMachineConfig) addWorkerNodes(count int) bool { | ||
| var errs []error | ||
| for i := 0; i < count; i++ { | ||
| // Create Windows VM in the cloud provider | ||
| log.V(1).Info("creating the Windows VM") | ||
| createdVM, err := r.cloudProvider.CreateWindowsVM() | ||
| // Create and configure a new Windows VM | ||
| vm, err := r.addWorkerNode() | ||
| if err != nil { | ||
| errs = append(errs, errors.Wrap(err, "error creating windows VM")) | ||
| log.Error(err, "error creating windows VM") | ||
| } | ||
|
|
||
| // Make the Windows VM a Windows worker node. | ||
| log.V(1).Info("configuring the Windows VM", "ID", | ||
| createdVM.GetCredentials().GetInstanceId()) | ||
| nc := nodeconfig.NewNodeConfig(r.k8sclientset, createdVM) | ||
| if err := nc.Configure(); err != nil { | ||
| // TODO: Unwrap to extract correct error | ||
| errs = append(errs, errors.Wrap(err, "configuring Windows VM failed")) | ||
| log.Error(err, "configuring Windows VM failed") | ||
| } | ||
| if err == nil { | ||
| log.Info("Windows VM has joined the cluster as a worker node", "ID", nc.GetCredentials().GetInstanceId()) | ||
| log.Error(err, "error adding a Windows worker node") | ||
| errs = append(errs, errors.Wrap(err, "error adding Windows worker node")) | ||
| continue | ||
sebsoto marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| // update the windowsVMs slice | ||
| if _, ok := r.windowsVMs[createdVM]; !ok { | ||
| r.windowsVMs[createdVM] = true | ||
| // update the windowsVMs map with the new VM | ||
| if _, ok := r.windowsVMs[vm]; !ok { | ||
| r.windowsVMs[vm] = true | ||
| } | ||
| } | ||
|
|
||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.