-
Notifications
You must be signed in to change notification settings - Fork 465
Add etcd-quorum-guard manifests and doc #613
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
959631e
56171c7
23ac78f
ccb0b81
c46bd7b
8c71809
7a2be5c
1073a6d
4a80ef5
515fb7b
68d9e41
b3915b4
e51dea4
2888f72
4f031c9
10fb2fb
f8b8f16
d25a3b9
149ea8a
13c86ec
b1a8279
fe855a0
fea9fad
ce3acd6
da7a6cf
b594807
901e42a
0882043
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| # etcd Quorum Guard | ||
|
|
||
| The etcd Quorum Guard ensures that quorum is maintained for etcd for | ||
| [OpenShift](https://openshift.io/). | ||
|
|
||
| For the etcd cluster to remain usable, we must maintain quorum, which | ||
| is a majority of all etcd members. For example, an etcd cluster with | ||
| 3 members (i.e. a 3 master deployment) must have at least 2 healthy | ||
| etcd members to meet the quorum limit. | ||
|
|
||
| There are situations where 2 etcd members could be down at once: | ||
|
|
||
| * a master has gone offline and the MachineConfig Controller (MCC) | ||
| tries to rollout a new MachineConfig (MC) by rebooting masters | ||
| * the MCC is doing a MachineConfig rollout and doesn't wait for the | ||
| etcd on the previous master to become healthy again before rebooting | ||
| the next master | ||
|
|
||
| The etcd Quorum Guard ensures that a drain on a master is not allowed | ||
| to proceed if the reboot of the master would cause etcd quorum loss. | ||
| It is implemented as a deployment, with one pod per master node. | ||
|
|
||
| The etcd Quorum Guard checks the health of etcd by querying the health | ||
| endpoint of etcd; if etcd reports itself unhealthy or is not present, | ||
| the quorum guard reports itself not ready. A disruption budget is | ||
| used to allow no more than one unhealthy/missing quorum guard (and | ||
| hence etcd). If one etcd is already not healthy or missing, this | ||
| disruption budget will act as a drain gate, not allowing an attempt to | ||
| drain another node. | ||
|
|
||
| This drain gate cannot protect against a second node failing due to | ||
| e. g. hardware failure; it can only protect against an attempt to | ||
| drain the node in preparation for taking it down. | ||
|
|
||
| There is no user or administrator action necessary or available for | ||
| the etcd Quorum Guard. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| package resourceapply | ||
|
|
||
| import ( | ||
| "github.com/openshift/machine-config-operator/lib/resourcemerge" | ||
| policyv1 "k8s.io/api/policy/v1beta1" | ||
| apierrors "k8s.io/apimachinery/pkg/api/errors" | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| policyclientv1 "k8s.io/client-go/kubernetes/typed/policy/v1beta1" | ||
| ) | ||
|
|
||
| // ApplyPodDisruptionBudget applies the required podDisruptionBudget to the cluster. | ||
| func ApplyPodDisruptionBudget(client policyclientv1.PodDisruptionBudgetsGetter, required *policyv1.PodDisruptionBudget) (*policyv1.PodDisruptionBudget, bool, error) { | ||
| existing, err := client.PodDisruptionBudgets(required.Namespace).Get(required.Name, metav1.GetOptions{}) | ||
| if apierrors.IsNotFound(err) { | ||
| actual, err := client.PodDisruptionBudgets(required.Namespace).Create(required) | ||
| return actual, true, err | ||
| } | ||
| if err != nil { | ||
| return nil, false, err | ||
| } | ||
|
|
||
| modified := resourcemerge.BoolPtr(false) | ||
| resourcemerge.EnsurePodDisruptionBudget(modified, existing, *required) | ||
| if !*modified { | ||
| return existing, false, nil | ||
| } | ||
|
|
||
| actual, err := client.PodDisruptionBudgets(required.Namespace).Update(existing) | ||
| return actual, true, err | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| package resourcemerge | ||
|
|
||
| import ( | ||
| policyv1 "k8s.io/api/policy/v1beta1" | ||
| "k8s.io/apimachinery/pkg/api/equality" | ||
| ) | ||
|
|
||
| // EnsurePodDisruptionBudget ensures that the existing matches the required. | ||
| // modified is set to true when existing had to be updated with required. | ||
| func EnsurePodDisruptionBudget(modified *bool, existing *policyv1.PodDisruptionBudget, required policyv1.PodDisruptionBudget) { | ||
| EnsureObjectMeta(modified, &existing.ObjectMeta, required.ObjectMeta) | ||
| if !equality.Semantic.DeepEqual(existing.Spec, required.Spec) { | ||
| *modified = true | ||
| existing.Spec = required.Spec | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| package resourceread | ||
|
|
||
| import ( | ||
| policyv1 "k8s.io/api/policy/v1beta1" | ||
| "k8s.io/apimachinery/pkg/runtime" | ||
| "k8s.io/apimachinery/pkg/runtime/serializer" | ||
| ) | ||
|
|
||
| var ( | ||
| policyScheme = runtime.NewScheme() | ||
| policyCodecs = serializer.NewCodecFactory(policyScheme) | ||
| ) | ||
|
|
||
| func init() { | ||
| if err := policyv1.AddToScheme(policyScheme); err != nil { | ||
| panic(err) | ||
| } | ||
| } | ||
|
|
||
| // ReadPodDisruptionBudgetV1OrDie reads podDisruptionBudget object from bytes. Panics on error. | ||
| func ReadPodDisruptionBudgetV1OrDie(objBytes []byte) *policyv1.PodDisruptionBudget { | ||
| requiredObj, err := runtime.Decode(policyCodecs.UniversalDecoder(policyv1.SchemeGroupVersion), objBytes) | ||
| if err != nil { | ||
| panic(err) | ||
| } | ||
| return requiredObj.(*policyv1.PodDisruptionBudget) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| apiVersion: apps/v1 | ||
| kind: Deployment | ||
| metadata: | ||
| name: etcd-quorum-guard | ||
| namespace: {{.TargetNamespace}} | ||
| spec: | ||
| replicas: 3 | ||
|
||
| selector: | ||
| matchLabels: | ||
| k8s-app: etcd-quorum-guard | ||
| strategy: | ||
| rollingUpdate: | ||
| maxSurge: 0 | ||
| maxUnavailable: 1 | ||
| type: RollingUpdate | ||
| template: | ||
| metadata: | ||
| labels: | ||
| name: etcd-quorum-guard | ||
| k8s-app: etcd-quorum-guard | ||
| spec: | ||
| hostNetwork: true | ||
| affinity: | ||
| podAntiAffinity: | ||
| requiredDuringSchedulingIgnoredDuringExecution: | ||
| - labelSelector: | ||
| matchExpressions: | ||
| - key: k8s-app | ||
| operator: In | ||
| values: | ||
| - "etcd-quorum-guard" | ||
| topologyKey: kubernetes.io/hostname | ||
| nodeSelector: | ||
| node-role.kubernetes.io/master: "" | ||
| priorityClassName: "system-cluster-critical" | ||
| tolerations: | ||
| - key: node-role.kubernetes.io/master | ||
| effect: NoSchedule | ||
| operator: Exists | ||
| - key: node.kubernetes.io/memory-pressure | ||
| effect: NoSchedule | ||
| operator: Exists | ||
| - key: node.kubernetes.io/disk-pressure | ||
| effect: NoSchedule | ||
| operator: Exists | ||
| - key: node.kubernetes.io/not-ready | ||
| effect: NoExecute | ||
| operator: Exists | ||
| - key: node.kubernetes.io/unreachable | ||
| effect: NoExecute | ||
| operator: Exists | ||
| - key: node.kubernetes.io/unschedulable | ||
| effect: NoExecute | ||
| operator: Exists | ||
| - key: node-role.kubernetes.io/etcd | ||
| operator: Exists | ||
| effect: NoSchedule | ||
| containers: | ||
| - image: "{{.Images.EtcdQuorumGuardImage}}" | ||
| imagePullPolicy: IfNotPresent | ||
| name: etcd-quorum-guard-container | ||
| volumeMounts: | ||
| - mountPath: /mnt/kube | ||
| name: kubecerts | ||
|
||
| command: | ||
| - "/bin/sh" | ||
| args: | ||
| - "-c" | ||
| - | | ||
| declare -r croot=/mnt/kube | ||
| set -x | ||
| declare -r health_endpoint="https://127.0.0.1:2379/health" | ||
| declare -r cert="$(find $croot -name 'system:etcd-peer*.crt' -print -quit)" | ||
| declare -r key="${cert%.crt}.key" | ||
| declare -r cacert="$croot/ca.crt" | ||
| ls -lR "$croot" | ||
| ls -lRL "$croot" | ||
| while : ; do date; curl --max-time 2 --cert "${cert//:/\:}" --key "$key" --cacert "$cacert" "$health_endpoint"; sleep 5; done | ||
| readinessProbe: | ||
| exec: | ||
| command: | ||
| - /bin/sh | ||
| - -c | ||
| - | | ||
| declare -r croot=/mnt/kube | ||
| declare -r health_endpoint="https://127.0.0.1:2379/health" | ||
| declare -r cert="$(find $croot -name 'system:etcd-peer*.crt' -print -quit)" | ||
| declare -r key="${cert%.crt}.key" | ||
| declare -r cacert="$croot/ca.crt" | ||
| [[ -z $cert || -z $key ]] && exit 1 | ||
| curl --max-time 2 --silent --cert "${cert//:/\:}" --key "$key" --cacert "$cacert" "$health_endpoint" |grep '{ *"health" *: *"true" *}' | ||
|
||
| initialDelaySecond: 5 | ||
| periodSecond: 5 | ||
| resources: | ||
| requests: | ||
| cpu: 10m | ||
| memory: 5Mi | ||
| volumes: | ||
| - name: kubecerts | ||
| hostPath: | ||
| path: /etc/kubernetes/static-pod-resources/etcd-member | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| apiVersion: policy/v1beta1 | ||
| kind: PodDisruptionBudget | ||
| metadata: | ||
| namespace: {{.TargetNamespace}} | ||
| name: etcd-quorum-guard | ||
| spec: | ||
| maxUnavailable: 1 | ||
| selector: | ||
| matchLabels: | ||
| k8s-app: etcd-quorum-guard |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this should reflect this to be correct https://github.com/openshift/etcd-quorum-guard/blob/master/manifests/image-references#L5-L8 right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://github.com/openshift/etcd-quorum-guard/ is actually now dead code (at least until/unless we revive it as a full-fledged operator).