-
Notifications
You must be signed in to change notification settings - Fork 77
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* adding test * Added updated chart * Added controller with watch * Added watch permission to clusterrole * Re-enabled the watch of nodes - successful test run * WIP - adding node taint check * WIP - adding node taint check - kubectl lib update * node taint applied * WIP - test watch works somewhat * removed logging * comment fix * WIP - refactoring pod deletion for vmware drain * WIP - recreate pvc * new pod and pvc are now come back up * removed unnecessary code from check_nodes * cleaning up comments * revert extracting a function * Added flags for RF * refactored to use an in-memory node to dc map * do not stop reconciliation if map update fails * fix jvm flags in tolerations-dc.yaml * Removed copyPodCredentials * updates to the in-memory node-to-dc map
- Loading branch information
1 parent
b97d1bd
commit e7d20fb
Showing
14 changed files
with
552 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRole | ||
metadata: | ||
name: {{ .Values.clusterRoleName }} | ||
rules: | ||
- apiGroups: | ||
- "" | ||
resources: | ||
- nodes | ||
verbs: | ||
- get | ||
- list | ||
- watch |
12 changes: 12 additions & 0 deletions
12
charts/cass-operator-chart/templates/clusterrolebinding.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
kind: ClusterRoleBinding | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
metadata: | ||
name: {{ .Values.clusterRoleBindingName }} | ||
subjects: | ||
- kind: ServiceAccount | ||
name: {{ .Values.serviceAccountName }} | ||
namespace: {{ .Release.Namespace }} | ||
roleRef: | ||
kind: ClusterRole | ||
name: {{ .Values.clusterRoleName }} | ||
apiGroup: rbac.authorization.k8s.io |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
// Copyright DataStax, Inc. | ||
// Please see the included license file for details. | ||
|
||
package reconciliation | ||
|
||
import ( | ||
"fmt" | ||
"sync" | ||
"time" | ||
|
||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/types" | ||
) | ||
|
||
func (rc *ReconciliationContext) GetPVCForPod(podNamespace string, podName string) (*corev1.PersistentVolumeClaim, error) { | ||
pvcFullName := fmt.Sprintf("%s-%s", PvcName, podName) | ||
|
||
pvc := &corev1.PersistentVolumeClaim{} | ||
err := rc.Client.Get(rc.Ctx, types.NamespacedName{Namespace: podNamespace, Name: pvcFullName}, pvc) | ||
if err != nil { | ||
rc.ReqLogger.Error(err, "error retrieving PersistentVolumeClaim") | ||
return nil, err | ||
} | ||
|
||
return pvc, nil | ||
} | ||
|
||
func (rc *ReconciliationContext) DeletePvcIgnoreFinalizers(podNamespace string, podName string) (*corev1.PersistentVolumeClaim, error) { | ||
var wg sync.WaitGroup | ||
|
||
wg.Add(1) | ||
|
||
var goRoutineError *error = nil | ||
|
||
pvcFullName := fmt.Sprintf("%s-%s", PvcName, podName) | ||
|
||
pvc, err := rc.GetPVCForPod(podNamespace, podName) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// Delete might hang due to a finalizer such as kubernetes.io/pvc-protection | ||
// so we run it asynchronously and then remove any finalizers to unblock it. | ||
go func() { | ||
defer wg.Done() | ||
rc.ReqLogger.Info("goroutine to delete pvc started") | ||
|
||
// If we don't grab a new copy of the pvc, the deletion could fail because the update has | ||
// changed the pvc and the delete fails because there is a newer version | ||
|
||
pvcToDelete := &corev1.PersistentVolumeClaim{} | ||
err := rc.Client.Get(rc.Ctx, types.NamespacedName{Namespace: podNamespace, Name: pvcFullName}, pvcToDelete) | ||
if err != nil { | ||
rc.ReqLogger.Info("goroutine to delete pvc: error found in get") | ||
rc.ReqLogger.Error(err, "error retrieving PersistentVolumeClaim for deletion") | ||
goRoutineError = &err | ||
} | ||
|
||
rc.ReqLogger.Info("goroutine to delete pvc: no error found in get") | ||
|
||
err = rc.Client.Delete(rc.Ctx, pvcToDelete) | ||
if err != nil { | ||
rc.ReqLogger.Info("goroutine to delete pvc: error found in delete") | ||
rc.ReqLogger.Error(err, "error removing PersistentVolumeClaim", | ||
"name", pvcFullName) | ||
goRoutineError = &err | ||
} | ||
rc.ReqLogger.Info("goroutine to delete pvc: no error found in delete") | ||
rc.ReqLogger.Info("goroutine to delete pvc: end of goroutine") | ||
}() | ||
|
||
// Give the resource a second to get to a terminating state. Note that this | ||
// may not be reflected in the resource's status... hence the sleep here as | ||
// opposed to checking the status. | ||
time.Sleep(5 * time.Second) | ||
|
||
// In the case of PVCs at least, finalizers removed before deletion can be | ||
// automatically added back. Consequently, we delete the resource first, | ||
// then remove any finalizers while it is terminating. | ||
|
||
pvc.ObjectMeta.Finalizers = []string{} | ||
|
||
err = rc.Client.Update(rc.Ctx, pvc) | ||
if err != nil { | ||
rc.ReqLogger.Info("ignoring error removing finalizer from PersistentVolumeClaim", | ||
"name", pvcFullName, | ||
"err", err.Error()) | ||
|
||
// Ignore some errors as this may fail due to the resource already having been | ||
// deleted (which is what we want). | ||
} | ||
|
||
rc.ReqLogger.Info("before wg.Wait()") | ||
|
||
// Wait for the delete to finish, which should have been unblocked by | ||
// removing the finalizers. | ||
wg.Wait() | ||
rc.ReqLogger.Info("after wg.Wait()") | ||
|
||
// We can't dereference a nil, so check if we have one | ||
if goRoutineError == nil { | ||
return pvc, nil | ||
} | ||
return nil, *goRoutineError | ||
} | ||
|
||
// Check nodes for vmware draining taints | ||
func (rc *ReconciliationContext) checkNodeTaints() error { | ||
logger := rc.ReqLogger | ||
rc.ReqLogger.Info("reconciler::checkNodesTaints") | ||
|
||
// Get the pods | ||
|
||
podList, err := rc.listPods(rc.Datacenter.GetClusterLabels()) | ||
if err != nil { | ||
logger.Error(err, "error listing all pods in the cluster") | ||
} | ||
|
||
rc.clusterPods = PodPtrsFromPodList(podList) | ||
|
||
for _, pod := range podList.Items { | ||
// Check the related node for taints | ||
node := &corev1.Node{} | ||
err := rc.Client.Get(rc.Ctx, types.NamespacedName{Namespace: "", Name: pod.Spec.NodeName}, node) | ||
if err != nil { | ||
logger.Error(err, "error retrieving node for pod for node taint check") | ||
return err | ||
} | ||
|
||
rc.ReqLogger.Info(fmt.Sprintf("node %s has %d taints", node.ObjectMeta.Name, len(node.Spec.Taints))) | ||
|
||
for _, taint := range node.Spec.Taints { | ||
if taint.Key == "node.vmware.com/drain" && taint.Effect == "NoSchedule" { | ||
if taint.Value == "planned-downtime" || taint.Value == "drain" { | ||
|
||
// Drain the cassandra node | ||
|
||
rc.ReqLogger.Info("reconciler::checkNodesTaints vmware taint found. draining and deleting pod", | ||
"pod", pod.Name) | ||
|
||
if isMgmtApiRunning(&pod) { | ||
err = rc.NodeMgmtClient.CallDrainEndpoint(&pod) | ||
if err != nil { | ||
rc.ReqLogger.Error(err, "error during cassandra node drain for vmware drain", | ||
"pod", pod.Name) | ||
} | ||
} | ||
|
||
// Add the cassandra node to replace nodes | ||
|
||
rc.Datacenter.Spec.ReplaceNodes = append(rc.Datacenter.Spec.ReplaceNodes, pod.ObjectMeta.Name) | ||
|
||
// Update CassandraDatacenter | ||
if err := rc.Client.Update(rc.Ctx, rc.Datacenter); err != nil { | ||
rc.ReqLogger.Error(err, "Failed to update CassandraDatacenter with removed finalizers") | ||
return err | ||
} | ||
|
||
// Remove the pvc | ||
|
||
_, err := rc.DeletePvcIgnoreFinalizers(pod.ObjectMeta.Namespace, pod.ObjectMeta.Name) | ||
if err != nil { | ||
rc.ReqLogger.Error(err, "error during PersistentVolume delete for vmware drain", | ||
"pod", pod.ObjectMeta.Name) | ||
return err | ||
} | ||
|
||
// Remove the pod | ||
|
||
err = rc.Client.Delete(rc.Ctx, &pod) | ||
if err != nil { | ||
rc.ReqLogger.Info("pod delete - err") | ||
rc.ReqLogger.Error(err, "error during cassandra node delete for vmware drain", | ||
"pod", pod.ObjectMeta.Name) | ||
return err | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.