diff --git a/bindata/etcd/pod.yaml b/bindata/etcd/pod.yaml index c379d6b916..83d2850ac7 100644 --- a/bindata/etcd/pod.yaml +++ b/bindata/etcd/pod.yaml @@ -5,7 +5,8 @@ metadata: namespace: openshift-etcd labels: app: etcd - etcd: "not-true-yet" + k8s-app: etcd + etcd: "true" revision: "REVISION" spec: containers: @@ -20,16 +21,50 @@ spec: #!/bin/sh set -euo pipefail - sleep 24h + ETCDCTL="etcdctl --cacert=/etc/kubernetes/static-pod-resources/configmaps/etcd-serving-ca/ca-bundle.crt \ + --cert=/etc/kubernetes/static-pod-resources/secrets/etcd-all-peer/etcd-peer-NODE_NAME.crt \ + --key=/etc/kubernetes/static-pod-resources/secrets/etcd-all-peer/etcd-peer-NODE_NAME.key \ + --endpoints=${ALL_ETCD_ENDPOINTS}" + ${ETCDCTL} member list - exit 0 + echo "waiting for member $NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME..." + COUNT=30 + while [ $COUNT -gt 0 ]; do + IS_MEMBER_PRESENT=$(${ETCDCTL} member list | grep -o "${NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME}.*:2380") + if [[ -n "${IS_MEMBER_PRESENT:-}" ]]; then + break + fi + sleep 1 + let COUNT=$COUNT-1 + done - # add logic here to confirm that we are part of the etcd members (the controller added us). - # this is probably a golang command that tries to confirm for two minutes before exiting - # and prints nothing except for the ETCD_INITIAL_CLUSTER + # if the member is not present after 30 seconds + if [ -z "$IS_MEMBER_PRESENT" ]; then + echo "member $NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME is not present after 30 seconds" + exit 1 + fi + echo "member $NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME is present, continuing" + initial_cluster="" + member_output=$( ${ETCDCTL} member list | cut -d',' -f3 ) + for endpoint_key in ${member_output}; do + endpoint=$(${ETCDCTL} member list | grep $endpoint_key | awk -F'[, ]' '{ print $7 }') + initial_cluster+="$endpoint_key=$endpoint," + echo "adding $endpoint_key=$endpoint," + done + # add this pod to the list + initial_cluster+="$NODE_NODE_ENVVAR_NAME_ETCD_NAME=https://$NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME:2380" + echo $initial_cluster + + # at this point we know this member is added. To support a transition, we must remove the old etcd pod. + # move it somewhere safe so we can retrieve it again later if something goes badly. + mv /etc/kubernetes/manifests/etcd-member.yaml /etc/kubernetes/etcd-backup-dir || true + + export ETCD_INITIAL_CLUSTER="${initial_cluster}" export ETCD_NAME=${NODE_NODE_ENVVAR_NAME_ETCD_NAME} + env | grep ETCD | grep -v NODE + set -x exec etcd \ --initial-advertise-peer-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2380 \ --cert-file=/etc/kubernetes/static-pod-resources/secrets/etcd-all-serving/etcd-serving-NODE_NAME.crt \ @@ -43,23 +78,40 @@ spec: --advertise-client-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2379 \ --listen-client-urls=https://${LISTEN_ON_ALL_IPS}:2379 \ --listen-peer-urls=https://${LISTEN_ON_ALL_IPS}:2380 \ - --listen-metrics-urls=https://${LISTEN_ON_ALL_IPS}:9978 + --listen-metrics-urls=https://${LISTEN_ON_ALL_IPS}:9978 || mv /etc/kubernetes/etcd-backup-dir/etcd-member.yaml /etc/kubernetes/manifests env: ${COMPUTED_ENV_VARS} resources: requests: - memory: 200Mi - cpu: 100m + memory: 600Mi + cpu: 300m limits: - memory: 200Mi - cpu: 100m + memory: 600Mi + cpu: 300m + readinessProbe: + exec: + command: + - /bin/sh + - -ec + - "lsof -n -i :2380 | grep LISTEN" + failureThreshold: 3 + initialDelaySeconds: 3 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 5 + securityContext: + privileged: true volumeMounts: - - mountPath: /etc/kubernetes/static-pod-resources - name: resource-dir - - mountPath: /etc/kubernetes/static-pod-certs - name: cert-dir - - mountPath: /var/lib/etcd/ - name: data-dir + - mountPath: /etc/kubernetes/manifests + name: static-pod-dir + - mountPath: /etc/kubernetes/etcd-backup-dir + name: etcd-backup-dir + - mountPath: /etc/kubernetes/static-pod-resources + name: resource-dir + - mountPath: /etc/kubernetes/static-pod-certs + name: cert-dir + - mountPath: /var/lib/etcd/ + name: data-dir - name: etcd-metrics image: ${IMAGE} imagePullPolicy: IfNotPresent @@ -71,14 +123,6 @@ ${COMPUTED_ENV_VARS} #!/bin/sh set -euo pipefail - sleep 24h - - exit 0 - - # add logic here to confirm that we are part of the etcd members (the controller added us). - # this is probably a golang command that tries to confirm for two minutes before exiting - # and prints nothing except for the ETCD_INITIAL_CLUSTER - export ETCD_NAME=${NODE_NODE_ENVVAR_NAME_ETCD_NAME} exec etcd grpc-proxy start \ @@ -100,6 +144,8 @@ ${COMPUTED_ENV_VARS} limits: memory: 200Mi cpu: 100m + securityContext: + privileged: true volumeMounts: - mountPath: /etc/kubernetes/static-pod-resources name: resource-dir @@ -112,14 +158,20 @@ ${COMPUTED_ENV_VARS} tolerations: - operator: "Exists" volumes: - - hostPath: - path: /etc/kubernetes/static-pod-resources/etcd-pod-REVISION - name: resource-dir - - hostPath: - path: /etc/kubernetes/static-pod-resources/etcd-certs - name: cert-dir - - hostPath: - path: /var/lib/etcd - type: "" - name: data-dir + - hostPath: + path: /etc/kubernetes/manifests + name: static-pod-dir + - hostPath: + path: /etc/kubernetes/static-pod-resources/etcd-member + name: etcd-backup-dir + - hostPath: + path: /etc/kubernetes/static-pod-resources/etcd-pod-REVISION + name: resource-dir + - hostPath: + path: /etc/kubernetes/static-pod-resources/etcd-certs + name: cert-dir + - hostPath: + path: /var/lib/etcd + type: "" + name: data-dir diff --git a/go.mod b/go.mod index 54e2503f56..3742ff3cfd 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/gorilla/mux v0.0.0-20191024121256-f395758b854c github.com/jteeuwen/go-bindata v3.0.8-0.20151023091102-a0ff2567cfb7+incompatible github.com/openshift/api v0.0.0-20200131223221-f2a771e1a90c - github.com/openshift/build-machinery-go v0.0.0-20200205161356-ef115f5adc73 + github.com/openshift/build-machinery-go v0.0.0-20200210090402-3b072832771e github.com/openshift/client-go v0.0.0-20200116152001-92a2713fa240 github.com/openshift/library-go v0.0.0-20200207150939-615337e1c3aa github.com/prometheus/client_golang v1.1.0 @@ -16,6 +16,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/vincent-petithory/dataurl v0.0.0-20191104211930-d1553a71de50 go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738 + google.golang.org/grpc v1.23.1 k8s.io/api v0.17.1 k8s.io/apimachinery v0.17.1 k8s.io/client-go v0.17.1 diff --git a/go.sum b/go.sum index d9769ed817..69a7141692 100644 --- a/go.sum +++ b/go.sum @@ -314,6 +314,8 @@ github.com/openshift/api v0.0.0-20200131223221-f2a771e1a90c h1:6kb8UZix0DNEfZbys github.com/openshift/api v0.0.0-20200131223221-f2a771e1a90c/go.mod h1:fT6U/JfG8uZzemTRwZA2kBDJP5nWz7v05UHnty/D+pk= github.com/openshift/build-machinery-go v0.0.0-20200205161356-ef115f5adc73 h1:WCvABw620V2FqeNoRJWeuAATqGjsrzb0UQ3tL0RHcXw= github.com/openshift/build-machinery-go v0.0.0-20200205161356-ef115f5adc73/go.mod h1:1CkcsT3aVebzRBzVTSbiKSkJMsC/CASqxesfqEMfJEc= +github.com/openshift/build-machinery-go v0.0.0-20200210090402-3b072832771e h1:qlMmBDqBavn7p4Y22teVEkJCnU9YAwhABHeXanAirWE= +github.com/openshift/build-machinery-go v0.0.0-20200210090402-3b072832771e/go.mod h1:1CkcsT3aVebzRBzVTSbiKSkJMsC/CASqxesfqEMfJEc= github.com/openshift/client-go v0.0.0-20200116152001-92a2713fa240 h1:XYfJWv2Ch+qInGLDEedHRtDsJwnxyU1L8U7SY56NcA8= github.com/openshift/client-go v0.0.0-20200116152001-92a2713fa240/go.mod h1:4riOwdj99Hd/q+iAcJZfNCsQQQMwURnZV6RL4WHYS5w= github.com/openshift/library-go v0.0.0-20200207150939-615337e1c3aa h1:3/i0Kzbt8TbC+QkZ3sZ9b6M64/CzEXa2fN2IoIKzXYs= diff --git a/manifests/0000_12_etcd-operator_06_deployment.yaml b/manifests/0000_12_etcd-operator_06_deployment.yaml index fc96a4eecf..7edc1817f6 100644 --- a/manifests/0000_12_etcd-operator_06_deployment.yaml +++ b/manifests/0000_12_etcd-operator_06_deployment.yaml @@ -46,7 +46,7 @@ spec: name: etcd-client env: - name: IMAGE - value: quay.io/openshift/cluster-etcd-operator:v4.0 + value: registry.svc.ci.openshift.org/openshift:etcd - name: OPERATOR_IMAGE value: quay.io/openshift/cluster-etcd-operator:v4.0 - name: OPERATOR_IMAGE_VERSION diff --git a/manifests/0000_12_etcd-operator_06_static_pod_demonset.yaml b/manifests/0000_12_etcd-operator_06_static_pod_demonset.yaml deleted file mode 100644 index 5ae77030b9..0000000000 --- a/manifests/0000_12_etcd-operator_06_static_pod_demonset.yaml +++ /dev/null @@ -1,79 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: etcd-staticpod - namespace: openshift-etcd -spec: - selector: - matchLabels: - k8s-app: etcd-staticpod - template: - metadata: - labels: - name: etcd-staticpod - k8s-app: etcd-staticpod - spec: - hostNetwork: true - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: k8s-app - operator: In - values: - - "" - topologyKey: kubernetes.io/hostname - nodeSelector: - node-role.kubernetes.io/master: "" - priorityClassName: "system-cluster-critical" - terminationGracePeriodSeconds: 3 - tolerations: - - key: node-role.kubernetes.io/master - effect: NoSchedule - operator: Exists - - key: node.kubernetes.io/not-ready - effect: NoExecute - operator: Exists - tolerationSeconds: 120 - - key: node.kubernetes.io/unreachable - effect: NoExecute - operator: Exists - tolerationSeconds: 120 - - key: node-role.kubernetes.io/etcd - operator: Exists - effect: NoSchedule - containers: - - image: "quay.io/openshift/cluster-etcd-operator:latest" - imagePullPolicy: IfNotPresent - name: etcd-staticpod - command: ["/usr/bin/cluster-etcd-operator"] - args: - - "staticpod" - serviceAccount: default - terminationMessagePolicy: FallbackToLogsOnError - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - volumeMounts: - - mountPath: /etc/kubernetes/manifests - name: manifests - - mountPath: /var/lib/etcd - name: data-dir - resources: - requests: - cpu: 10m - memory: 5Mi - volumes: - - name: manifests - hostPath: - path: /etc/kubernetes/manifests - - name: data-dir - hostPath: - path: /var/lib/etcd diff --git a/manifests/0000_12_etcd-operator_06_static_sync_demonset.yaml b/manifests/0000_12_etcd-operator_06_static_sync_demonset.yaml deleted file mode 100644 index 954b0f5520..0000000000 --- a/manifests/0000_12_etcd-operator_06_static_sync_demonset.yaml +++ /dev/null @@ -1,73 +0,0 @@ -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: etcd-staticsync - namespace: openshift-etcd -spec: - selector: - matchLabels: - k8s-app: etcd-staticsync - template: - metadata: - labels: - name: etcd-staticsync - k8s-app: etcd-staticsync - spec: - hostNetwork: true - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: k8s-app - operator: In - values: - - "" - topologyKey: kubernetes.io/hostname - nodeSelector: - node-role.kubernetes.io/master: "" - priorityClassName: "system-cluster-critical" - terminationGracePeriodSeconds: 3 - tolerations: - - key: node-role.kubernetes.io/master - effect: NoSchedule - operator: Exists - - key: node.kubernetes.io/not-ready - effect: NoExecute - operator: Exists - tolerationSeconds: 120 - - key: node.kubernetes.io/unreachable - effect: NoExecute - operator: Exists - tolerationSeconds: 120 - - key: node-role.kubernetes.io/etcd - operator: Exists - effect: NoSchedule - containers: - - image: "quay.io/openshift/cluster-etcd-operator:latest" - imagePullPolicy: IfNotPresent - name: etcd-staticsync - command: ["/usr/bin/cluster-etcd-operator"] - args: - - "staticsync" - terminationMessagePolicy: FallbackToLogsOnError - volumeMounts: - - mountPath: /run/secrets/etcd - name: etcd-static-token - resources: - requests: - cpu: 10m - memory: 5Mi - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - volumes: - - name: etcd-static-token - hostPath: - path: /etc/kubernetes/static-pod-resources/etcd-member/secrets/kubernetes.io/sa-token diff --git a/manifests/image-references b/manifests/image-references index 9dfbb9619b..6785031f3a 100644 --- a/manifests/image-references +++ b/manifests/image-references @@ -6,3 +6,7 @@ spec: from: kind: DockerImage name: quay.io/openshift/cluster-etcd-operator + - name: etcd + from: + kind: DockerImage + name: registry.svc.ci.openshift.org/openshift:etcd \ No newline at end of file diff --git a/pkg/operator/bootstrapteardown/bootstrap_teardown_controller.go b/pkg/operator/bootstrapteardown/bootstrap_teardown_controller.go index a1ddb3d027..72dfc23cfc 100644 --- a/pkg/operator/bootstrapteardown/bootstrap_teardown_controller.go +++ b/pkg/operator/bootstrapteardown/bootstrap_teardown_controller.go @@ -1,11 +1,20 @@ package bootstrapteardown import ( + "context" "encoding/json" "fmt" "strings" "time" + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/pkg/transport" + "google.golang.org/grpc" + + "k8s.io/client-go/kubernetes" + + "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" + operatorv1 "github.com/openshift/api/operator/v1" operatorv1informers "github.com/openshift/client-go/operator/informers/externalversions" operatorv1listers "github.com/openshift/client-go/operator/listers/operator/v1" @@ -23,18 +32,21 @@ import ( ) const ( - workQueueKey = "key" - configMapName = "config" - configMapKey = "config.yaml" + workQueueKey = "key" + configMapName = "config" + configMapKey = "config.yaml" + ConditionBootstrapRemoved = "BootstrapRemoved" + ConditionBootstrapSafeToRemove = "BootstrapSafeToRemove" ) type BootstrapTeardownController struct { - operatorConfigClient v1helpers.OperatorClient - clusterMemberShipController *clustermembercontroller.ClusterMemberController + kubeClient kubernetes.Interface + operatorConfigClient v1helpers.OperatorClient etcdOperatorLister operatorv1listers.EtcdLister kubeAPIServerLister operatorv1listers.KubeAPIServerLister configMapLister corev1listers.ConfigMapLister + endpointLister corev1listers.EndpointsLister cachesToSync []cache.InformerSynced queue workqueue.RateLimitingInterface @@ -44,8 +56,8 @@ type BootstrapTeardownController struct { // TODO wire a triggering lister func NewBootstrapTeardownController( operatorConfigClient v1helpers.OperatorClient, + kubeClient kubernetes.Interface, kubeInformersForNamespaces operatorv1helpers.KubeInformersForNamespaces, - clusterMemberShipController *clustermembercontroller.ClusterMemberController, operatorInformers operatorv1informers.SharedInformerFactory, @@ -53,18 +65,20 @@ func NewBootstrapTeardownController( ) *BootstrapTeardownController { openshiftKubeAPIServerNamespacedInformers := kubeInformersForNamespaces.InformersFor("openshift-kube-apiserver") c := &BootstrapTeardownController{ - operatorConfigClient: operatorConfigClient, - clusterMemberShipController: clusterMemberShipController, + operatorConfigClient: operatorConfigClient, + kubeClient: kubeClient, etcdOperatorLister: operatorInformers.Operator().V1().Etcds().Lister(), kubeAPIServerLister: operatorInformers.Operator().V1().KubeAPIServers().Lister(), configMapLister: openshiftKubeAPIServerNamespacedInformers.Core().V1().ConfigMaps().Lister(), + endpointLister: kubeInformersForNamespaces.InformersFor(operatorclient.TargetNamespace).Core().V1().Endpoints().Lister(), cachesToSync: []cache.InformerSynced{ operatorConfigClient.Informer().HasSynced, operatorInformers.Operator().V1().Etcds().Informer().HasSynced, operatorInformers.Operator().V1().KubeAPIServers().Informer().HasSynced, openshiftKubeAPIServerNamespacedInformers.Core().V1().ConfigMaps().Informer().HasSynced, + openshiftKubeAPIServerNamespacedInformers.Core().V1().Endpoints().Informer().HasSynced, }, queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "BootstrapTeardownController"), eventRecorder: eventRecorder.WithComponentSuffix("bootstrap-teardown-controller"), @@ -72,6 +86,7 @@ func NewBootstrapTeardownController( operatorInformers.Operator().V1().KubeAPIServers().Informer().AddEventHandler(c.eventHandler()) operatorInformers.Operator().V1().Etcds().Informer().AddEventHandler(c.eventHandler()) openshiftKubeAPIServerNamespacedInformers.Core().V1().ConfigMaps().Informer().AddEventHandler(c.eventHandler()) + openshiftKubeAPIServerNamespacedInformers.Core().V1().Endpoints().Informer().AddEventHandler(c.eventHandler()) operatorConfigClient.Informer().AddEventHandler(c.eventHandler()) return c @@ -126,13 +141,16 @@ func (c *BootstrapTeardownController) removeBootstrap() error { return nil } - etcdEndpointExists := c.clusterMemberShipController.IsMember("etcd-bootstrap") + etcdEndpointExists, err := c.isBootstrapInEndpoints() + if err != nil { + return err + } // checks the actual etcd cluster membership API if etcd-bootstrap exists - etcdMemberExists := c.clusterMemberShipController.IsEtcdMember("etcd-bootstrap") + etcdMemberExists := c.isEtcdMember("etcd-bootstrap") if !etcdEndpointExists && !etcdMemberExists { // set bootstrap removed condition _, _, updateErr := v1helpers.UpdateStatus(c.operatorConfigClient, v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ - Type: clustermembercontroller.ConditionBootstrapRemoved, + Type: "BootstrapRemoved", Status: operatorv1.ConditionTrue, Reason: "BootstrapNodeRemoved", Message: "Etcd operator has scaled", @@ -145,7 +163,7 @@ func (c *BootstrapTeardownController) removeBootstrap() error { return nil } else { _, _, _ = v1helpers.UpdateStatus(c.operatorConfigClient, v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ - Type: clustermembercontroller.ConditionBootstrapRemoved, + Type: "BootstrapRemoved", Status: operatorv1.ConditionFalse, Reason: "BootstrapNodeNotRemoved", Message: fmt.Sprintf("Bootstrap node is not removed yet: etcdEndpointExists: %t etcdMemberExists %t", etcdEndpointExists, etcdMemberExists), @@ -154,11 +172,161 @@ func (c *BootstrapTeardownController) removeBootstrap() error { c.eventRecorder.Event("BootstrapTeardownController", "safe to remove bootstrap") - if err := c.clusterMemberShipController.RemoveBootstrapFromEndpoint(); err != nil { + if err := c.RemoveBootstrapFromEndpoint(); err != nil { + return err + } + + if err := c.etcdMemberRemove("etcd-bootstrap"); err != nil { + return err + } + + return nil +} + +func (c *BootstrapTeardownController) isBootstrapInEndpoints() (bool, error) { + hostEtcdEndpoints, err := c.endpointLister.Endpoints(operatorclient.TargetNamespace).Get("host-etcd") + if err != nil { + return false, err + } + for _, endpointAddress := range hostEtcdEndpoints.Subsets[0].Addresses { + if endpointAddress.Hostname == "etcd-bootstrap" { + return true, nil + } + } + + return false, nil +} + +func (c *BootstrapTeardownController) isEtcdMember(name string) bool { + cli, err := c.getEtcdClient() + defer cli.Close() + if err != nil { + return false + } + ctx, cancel := context.WithCancel(context.Background()) + l, err := cli.MemberList(ctx) + cancel() + if err != nil { + return false + } + for _, m := range l.Members { + if m.Name == name { + return true + } + } + return false +} + +func (c *BootstrapTeardownController) etcdMemberRemove(name string) error { + cli, err := c.getEtcdClient() + defer cli.Close() + if err != nil { + return err + } + ctx, cancel := context.WithCancel(context.Background()) + l, err := cli.MemberList(ctx) + cancel() + if err != nil { + return err + } + for _, member := range l.Members { + if member.Name == name { + + resp, err := cli.MemberRemove(context.Background(), member.ID) + if err != nil { + return err + } + klog.Infof("Members left %#v", resp.Members) + } + } + return nil +} + +func (c *BootstrapTeardownController) getEtcdClient() (*clientv3.Client, error) { + endpoints, err := c.directEtcdEndpoints() + if err != nil { + return nil, err + } + + dialOptions := []grpc.DialOption{ + grpc.WithBlock(), // block until the underlying connection is up + } + + tlsInfo := transport.TLSInfo{ + CertFile: "/var/run/secrets/etcd-client/tls.crt", + KeyFile: "/var/run/secrets/etcd-client/tls.key", + TrustedCAFile: "/var/run/configmaps/etcd-ca/ca-bundle.crt", + } + tlsConfig, err := tlsInfo.ClientConfig() + + cfg := &clientv3.Config{ + DialOptions: dialOptions, + Endpoints: endpoints, + DialTimeout: 5 * time.Second, + TLS: tlsConfig, + } + + cli, err := clientv3.New(*cfg) + if err != nil { + return nil, err + } + return cli, err +} + +func (c *BootstrapTeardownController) directEtcdEndpoints() ([]string, error) { + hostEtcd, err := c.endpointLister.Endpoints(operatorclient.TargetNamespace).Get("host-etcd") + if err != nil { + c.eventRecorder.Warningf("ErrorGettingHostEtcd", "error occured while getting host-etcd endpoint: %#v", err) + return []string{}, err + } + if len(hostEtcd.Subsets) == 0 { + c.eventRecorder.Warningf("EtcdAddressNotFound", "could not find etcd address in host-etcd") + return []string{}, fmt.Errorf("could not find etcd address in host-etcd") + } + + etcdDiscoveryDomain := hostEtcd.Annotations["alpha.installer.openshift.io/dns-suffix"] + + var endpoints []string + for _, addr := range hostEtcd.Subsets[0].Addresses { + endpoints = append(endpoints, fmt.Sprintf("https://%s.%s:2379", addr.Hostname, etcdDiscoveryDomain)) + } + return endpoints, nil +} + +func (c *BootstrapTeardownController) RemoveBootstrapFromEndpoint() error { + hostEndpoint, err := c.endpointLister.Endpoints(operatorclient.TargetNamespace).Get("host-etcd") + if err != nil { return err } - if err := c.clusterMemberShipController.EtcdMemberRemove("etcd-bootstrap"); err != nil { + hostEndpointCopy := hostEndpoint.DeepCopy() + + subsetIndex := -1 + bootstrapIndex := -1 + for sI, s := range hostEndpointCopy.Subsets { + for i, s := range s.Addresses { + if s.Hostname == "etcd-bootstrap" { + bootstrapIndex = i + subsetIndex = sI + break + } + } + } + + if subsetIndex == -1 || bootstrapIndex == -1 { + // Unable to find bootstrap + return nil + } + + if len(hostEndpointCopy.Subsets[subsetIndex].Addresses) <= 1 { + return fmt.Errorf("only etcd-bootstrap endpoint observed, try again") + } + + hostEndpointCopy.Subsets[subsetIndex].Addresses = append(hostEndpointCopy.Subsets[subsetIndex].Addresses[0:bootstrapIndex], hostEndpointCopy.Subsets[subsetIndex].Addresses[bootstrapIndex+1:]...) + + _, err = c.kubeClient.CoreV1().Endpoints(hostEndpointCopy.Namespace).Update(hostEndpointCopy) + if err != nil { + klog.Errorf("error updating endpoint: %#v\n", err) return err } diff --git a/pkg/operator/bootstrapteardown/waitforceo.go b/pkg/operator/bootstrapteardown/waitforceo.go index b1d956198f..bfd9246672 100644 --- a/pkg/operator/bootstrapteardown/waitforceo.go +++ b/pkg/operator/bootstrapteardown/waitforceo.go @@ -5,7 +5,6 @@ import ( operatorv1 "github.com/openshift/api/operator/v1" operatorversionedclient "github.com/openshift/client-go/operator/clientset/versioned" - "github.com/openshift/cluster-etcd-operator/pkg/operator/clustermembercontroller" operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/watch" @@ -61,10 +60,10 @@ func waitForEtcdBootstrap(ctx context.Context, operatorRestClient rest.Interface } func done(etcd *operatorv1.Etcd) (bool, error) { - if operatorv1helpers.IsOperatorConditionTrue(etcd.Status.Conditions, clustermembercontroller.ConditionBootstrapRemoved) { + if operatorv1helpers.IsOperatorConditionTrue(etcd.Status.Conditions, ConditionBootstrapRemoved) { klog.Info("Cluster etcd operator bootstrapped successfully") return true, nil } - klog.Infof("waiting on condition %s in etcd CR %s/%s to be True.", clustermembercontroller.ConditionBootstrapRemoved, etcd.Namespace, etcd.Name) + klog.Infof("waiting on condition %s in etcd CR %s/%s to be True.", ConditionBootstrapRemoved, etcd.Namespace, etcd.Name) return false, nil } diff --git a/pkg/operator/bootstrapteardown/waitforceo_test.go b/pkg/operator/bootstrapteardown/waitforceo_test.go index a3a53db14e..f0bd6330b3 100644 --- a/pkg/operator/bootstrapteardown/waitforceo_test.go +++ b/pkg/operator/bootstrapteardown/waitforceo_test.go @@ -3,7 +3,6 @@ package bootstrapteardown import ( "testing" - "github.com/openshift/cluster-etcd-operator/pkg/operator/clustermembercontroller" "github.com/openshift/library-go/pkg/operator/events" operatorv1 "github.com/openshift/api/operator/v1" @@ -48,7 +47,7 @@ func Test_isEtcdAvailable(t *testing.T) { OperatorStatus: v1.OperatorStatus{ Conditions: []v1.OperatorCondition{ { - Type: clustermembercontroller.ConditionBootstrapSafeToRemove, + Type: ConditionBootstrapSafeToRemove, Status: v1.ConditionTrue, }, }, @@ -77,7 +76,7 @@ func Test_isEtcdAvailable(t *testing.T) { OperatorStatus: v1.OperatorStatus{ Conditions: []v1.OperatorCondition{ { - Type: clustermembercontroller.ConditionBootstrapSafeToRemove, + Type: ConditionBootstrapSafeToRemove, Status: v1.ConditionFalse, }, }, diff --git a/pkg/operator/clustermembercontroller/clustermembercontroller.go b/pkg/operator/clustermembercontroller/clustermembercontroller.go index 2d218b08b9..b1961d5318 100644 --- a/pkg/operator/clustermembercontroller/clustermembercontroller.go +++ b/pkg/operator/clustermembercontroller/clustermembercontroller.go @@ -1,31 +1,7 @@ package clustermembercontroller import ( - "bytes" - "context" - "encoding/json" - "fmt" - "strings" "time" - - operatorv1 "github.com/openshift/api/operator/v1" - ceoapi "github.com/openshift/cluster-etcd-operator/pkg/operator/api" - "github.com/openshift/library-go/pkg/operator/events" - "github.com/openshift/library-go/pkg/operator/v1helpers" - "go.etcd.io/etcd/clientv3" - "go.etcd.io/etcd/pkg/transport" - "google.golang.org/grpc" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/informers" - corev1client "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/util/retry" - "k8s.io/client-go/util/workqueue" - "k8s.io/klog" ) const ( @@ -38,603 +14,5 @@ const ( EtcdHostEndpointName = "host-etcd" EtcdEndpointName = "etcd" ConditionBootstrapSafeToRemove = "BootstrapSafeToRemove" - ConditionBootstrapRemoved = "BootstrapRemoved" dialTimeout = 20 * time.Second ) - -type ClusterMemberController struct { - clientset corev1client.Interface - operatorConfigClient v1helpers.OperatorClient - kubeInformersForOpenshiftEtcdNamespace informers.SharedInformerFactory - queue workqueue.RateLimitingInterface - eventRecorder events.Recorder - etcdDiscoveryDomain string -} - -func NewClusterMemberController( - clientset corev1client.Interface, - operatorConfigClient v1helpers.OperatorClient, - - kubeInformersForOpenshiftEtcdNamespace informers.SharedInformerFactory, - eventRecorder events.Recorder, - etcdDiscoveryDomain string, -) *ClusterMemberController { - c := &ClusterMemberController{ - clientset: clientset, - operatorConfigClient: operatorConfigClient, - queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "ClusterMemberController"), - kubeInformersForOpenshiftEtcdNamespace: kubeInformersForOpenshiftEtcdNamespace, - eventRecorder: eventRecorder.WithComponentSuffix("cluster-member-controller"), - etcdDiscoveryDomain: etcdDiscoveryDomain, - } - kubeInformersForOpenshiftEtcdNamespace.Core().V1().Pods().Informer().AddEventHandler(c.eventHandler()) - kubeInformersForOpenshiftEtcdNamespace.Core().V1().Endpoints().Informer().AddEventHandler(c.eventHandler()) - kubeInformersForOpenshiftEtcdNamespace.Core().V1().ConfigMaps().Informer().AddEventHandler(c.eventHandler()) - operatorConfigClient.Informer().AddEventHandler(c.eventHandler()) - - return c -} - -func (c *ClusterMemberController) sync() error { - pods, err := c.clientset.CoreV1().Pods("openshift-etcd").List(metav1.ListOptions{LabelSelector: "k8s-app=etcd"}) - if err != nil { - klog.Infof("No Pod found in openshift-etcd with label k8s-app=etcd") - return err - } - - resyncName, err := c.getResyncName(pods) - for i := range pods.Items { - p := &pods.Items[i] - klog.Infof("Found etcd Pod with name %v\n", p.Name) - - // we anchor this loop on the configmap. In the case of failure we can resync by aligning with that Pod - switch resyncName { - case "": - break - case p.Name: - klog.Infof("resyncing on %s\n", p.Name) - default: - continue - } - - // exisiting member can be removed order is important here - if c.IsStatus("pending", p.Name, ceoapi.MemberRemove) { - klog.Infof("Member is unhealthy and is being removed: %s\n", p.Name) - if err := c.EtcdMemberRemove(p.Name); err != nil { - c.eventRecorder.Warning("ScalingDownFailed", err.Error()) - return err - // Todo alaypatel07: need to take care of condition degraded - // Todo alaypatel07: need to skip this reconciliation loop and continue later - // after the member is removed from this very point. - } - // continue? - } - - if c.IsMember(p.Name) { - klog.Infof("Member is already part of the cluster %s\n", p.Name) - name, err := c.getScaleAnnotationName() - if err != nil { - klog.Errorf("failed to obtain name from annotation %v", err) - } - // clear annotation because scaling is complete - if name == p.Name { - if err := c.setScaleAnnotation(""); err != nil { - return err - } - } - continue - } - - condUpgradable := operatorv1.OperatorCondition{ - Type: "ClusterMemberUpgradeable", - Status: operatorv1.ConditionFalse, - } - condProgressing := operatorv1.OperatorCondition{ - Type: "ClusterMemberProgressing", - Status: operatorv1.ConditionTrue, - } - // Setting the available false when scaling. This will prevent installer from reporting - // success when any of the members are not ready - condAvailable := operatorv1.OperatorCondition{ - Type: "ClusterMemberAvailable", - Status: operatorv1.ConditionFalse, - } - condDegraded := operatorv1.OperatorCondition{ - Type: "ClusterMemberDegraded", - Status: operatorv1.ConditionFalse, - } - if _, _, updateError := v1helpers.UpdateStatus(c.operatorConfigClient, - v1helpers.UpdateConditionFn(condUpgradable), - v1helpers.UpdateConditionFn(condProgressing), - v1helpers.UpdateConditionFn(condAvailable), - v1helpers.UpdateConditionFn(condDegraded)); updateError != nil { - return updateError - } - - // although we dont use SRV for server bootstrap we do use the records to map peerurls - peerFQDN, err := ReverseLookupSelf("etcd-server-ssl", "tcp", c.etcdDiscoveryDomain, p.Status.HostIP) - if err != nil { - klog.Errorf("error looking up self: %v", err) - continue - } - - // Pending MemberReady: etcd is free join cluster here we provide configurations nessisary to fullfill dependencies. - // if c.IsStatus("pending", p.Name, ceoapi.MemberReady) { - members, err := c.EtcdList("members") - if err != nil { - return err - } - - es := ceoapi.EtcdScaling{ - Metadata: &metav1.ObjectMeta{ - Name: p.Name, - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Members: members, - PodFQDN: peerFQDN, - } - - esb, err := json.Marshal(es) - if err != nil { - return err - } - - retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { - if err := c.setScaleAnnotation(string(esb)); err != nil { - return err - } - return nil - }) - if retryErr != nil { - return fmt.Errorf("Update approve failed: %v", retryErr) - } - // } - - // Pending MemberAdd: here we have observed the static pod having add dependencies filled ok to scale Cluster API. - if c.IsStatus("pending", p.Name, ceoapi.MemberReady) { - if err := c.etcdMemberAdd([]string{fmt.Sprintf("https://%s:2380", peerFQDN)}); err != nil { - c.eventRecorder.Warning("ScalingFailed", err.Error()) - return err - } - } - if c.IsMember(p.Name) { - klog.Infof("Member is already part of the cluster: %s\n", p.Name) - continue - } - - // should not happen - rerr := fmt.Errorf("failed scale member %s", p.Name) - c.eventRecorder.Warning("ScalingFailed", rerr.Error()) - return rerr - } - - if c.isClusterEtcdOperatorReady() { - // report available - condAvailable := operatorv1.OperatorCondition{ - Type: "ClusterMemberAvailable", - Status: operatorv1.ConditionTrue, - } - condUpgradable := operatorv1.OperatorCondition{ - Type: "ClusterMemberUpgradeable", - Status: operatorv1.ConditionTrue, - } - condProgressing := operatorv1.OperatorCondition{ - Type: "ClusterMemberProgressing", - Status: operatorv1.ConditionFalse, - } - condDegraded := operatorv1.OperatorCondition{ - Type: "ClusterMemberDegraded", - Status: operatorv1.ConditionFalse, - } - - if _, _, updateError := v1helpers.UpdateStatus(c.operatorConfigClient, - v1helpers.UpdateConditionFn(condAvailable), - v1helpers.UpdateConditionFn(condUpgradable), - v1helpers.UpdateConditionFn(condProgressing), - v1helpers.UpdateConditionFn(condDegraded)); updateError != nil { - klog.Infof("Error updating status %#v", err) - return updateError - } - klog.V(2).Info("scaling complete, etcd-bootstrap is safe to remove") - _, _, updateErr := v1helpers.UpdateStatus(c.operatorConfigClient, v1helpers.UpdateConditionFn( - operatorv1.OperatorCondition{ - Type: ConditionBootstrapSafeToRemove, - Status: operatorv1.ConditionTrue, - Reason: "ScalingComplete", - Message: "cluster-etcd-operator has scaled, etcd-bootstrap safe to remove", - })) - if updateErr != nil { - klog.Errorf("clustermembercontroller:sync: error updating status: %#v", updateErr) - return updateErr - } - return nil - } else { - klog.V(2).Info("scaling incomplete, etcd-bootstrap is not safe to remove") - _, _, updateErr := v1helpers.UpdateStatus(c.operatorConfigClient, v1helpers.UpdateConditionFn( - operatorv1.OperatorCondition{ - Type: ConditionBootstrapSafeToRemove, - Status: operatorv1.ConditionFalse, - Reason: "ScalingIncomplete", - Message: "cluster-etcd-operator is scaling, etcd-bootstrap is not safe to remove", - })) - if updateErr != nil { - klog.Errorf("clustermembercontroller:sync: error updating status: %#v", updateErr) - return updateErr - } - } - klog.Infof("Wait for cluster-etcd-operator to get ready") - return nil -} - -func (c *ClusterMemberController) Endpoints() ([]string, error) { - storageConfigURLsPath := []string{"storageConfig", "urls"} - operatorSpec, _, _, err := c.operatorConfigClient.GetOperatorState() - if err != nil { - return nil, err - } - config := map[string]interface{}{} - if err := json.NewDecoder(bytes.NewBuffer(operatorSpec.ObservedConfig.Raw)).Decode(&config); err != nil { - klog.V(4).Infof("decode of existing config failed with error: %v", err) - } - endpoints, exists, err := unstructured.NestedStringSlice(config, storageConfigURLsPath...) - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("etcd storageConfig urls not observed") - } - - klog.V(2).Infof("Endpoints: creating etcd client with endpoints %s", strings.Join(endpoints, ", ")) - return endpoints, nil -} - -func (c *ClusterMemberController) getEtcdClient() (*clientv3.Client, error) { - endpoints, err := c.Endpoints() - if err != nil { - return nil, err - } - tlsInfo := transport.TLSInfo{ - CertFile: etcdCertFile, - KeyFile: etcdKeyFile, - TrustedCAFile: etcdTrustedCAFile, - } - tlsConfig, err := tlsInfo.ClientConfig() - - dialOptions := []grpc.DialOption{ - grpc.WithBlock(), // block until the underlying connection is up - } - - cfg := &clientv3.Config{ - DialOptions: dialOptions, - Endpoints: endpoints, - DialTimeout: dialTimeout, - TLS: tlsConfig, - } - - cli, err := clientv3.New(*cfg) - if err != nil { - return nil, err - } - return cli, err -} - -func (c *ClusterMemberController) EtcdMemberRemove(name string) error { - cli, err := c.getEtcdClient() - defer cli.Close() - if err != nil { - return err - } - ctx, cancel := context.WithCancel(context.Background()) - l, err := cli.MemberList(ctx) - cancel() - if err != nil { - return err - } - for _, member := range l.Members { - if member.Name == name { - - resp, err := cli.MemberRemove(context.Background(), member.ID) - if err != nil { - return err - } - klog.Infof("Members left %#v", resp.Members) - } - } - return nil -} - -func (c *ClusterMemberController) EtcdList(bucket string) ([]ceoapi.Member, error) { - configPath := []string{"cluster", bucket} - operatorSpec, _, _, err := c.operatorConfigClient.GetOperatorState() - if err != nil { - return nil, err - } - config := map[string]interface{}{} - if err := json.NewDecoder(bytes.NewBuffer(operatorSpec.ObservedConfig.Raw)).Decode(&config); err != nil { - klog.V(4).Infof("decode of existing config failed with error: %v", err) - } - data, exists, err := unstructured.NestedSlice(config, configPath...) - if err != nil { - return nil, err - } - // populate current etcd members as observed. - members := []ceoapi.Member{} - if !exists { - klog.Infof("bucket %s empty", bucket) - return members, nil - } - - for _, member := range data { - memberMap, _ := member.(map[string]interface{}) - name, exists, err := unstructured.NestedString(memberMap, "name") - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("member name does not exist") - } - peerURLs, exists, err := unstructured.NestedString(memberMap, "peerURLs") - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("member peerURLs do not exist") - } - // why have different terms i.e. status and condition? can we choose one and mirror? - status, exists, err := unstructured.NestedString(memberMap, "status") - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("member status does not exist") - } - - condition := ceoapi.GetMemberCondition(status) - m := ceoapi.Member{ - Name: name, - PeerURLS: []string{peerURLs}, - Conditions: []ceoapi.MemberCondition{ - { - Type: condition, - }, - }, - } - members = append(members, m) - } - return members, nil -} - -func (c *ClusterMemberController) IsMember(name string) bool { - members, _ := c.EtcdList("members") - for _, m := range members { - if m.Name == name { - return true - } - } - return false -} - -func (c *ClusterMemberController) IsEtcdMember(name string) bool { - cli, err := c.getEtcdClient() - defer cli.Close() - if err != nil { - return false - } - ctx, cancel := context.WithCancel(context.Background()) - l, err := cli.MemberList(ctx) - cancel() - if err != nil { - return false - } - for _, m := range l.Members { - if m.Name == name { - return true - } - } - return false -} - -// IsStatus returns true or false based on the bucket name and status of an etcd. If multiple status are passed -// the compare is done using or so if true one of the status exists for that etcd. -func (c *ClusterMemberController) IsStatus(bucket string, name string, condition ...ceoapi.MemberConditionType) bool { - members, _ := c.EtcdList(bucket) - for _, m := range members { - klog.Warningf("IsMemberRemove: checking %v vs %v type = %v\n", m.Name, name, m.Conditions[0].Type) - if m.Name == name { - for _, status := range condition { - if m.Conditions[0].Type == status { - return true - } - } - } - } - return false -} - -func (c *ClusterMemberController) setScaleAnnotation(scaling string) error { - result, err := c.clientset.CoreV1().ConfigMaps("openshift-etcd").Get("member-config", metav1.GetOptions{}) - if err != nil { - return err - } - if result.Annotations == nil { - result.Annotations = make(map[string]string) - } - result.Annotations[EtcdScalingAnnotationKey] = scaling - _, updateErr := c.clientset.CoreV1().ConfigMaps("openshift-etcd").Update(result) - if updateErr != nil { - return updateErr - } - - return nil -} - -func (c *ClusterMemberController) getScaleAnnotationName() (string, error) { - cm, err := c.clientset.CoreV1().ConfigMaps("openshift-etcd").Get("member-config", metav1.GetOptions{}) - if err != nil { - return "", err - } - return GetScaleAnnotationName(cm) -} - -func (c *ClusterMemberController) Run(stopCh <-chan struct{}) { - defer utilruntime.HandleCrash() - defer c.queue.ShutDown() - - klog.Infof("Starting ClusterMemberController") - defer klog.Infof("Shutting down ClusterMemberController") - - if !cache.WaitForCacheSync(stopCh, - c.kubeInformersForOpenshiftEtcdNamespace.Core().V1().Pods().Informer().HasSynced, - c.kubeInformersForOpenshiftEtcdNamespace.Core().V1().Endpoints().Informer().HasSynced, - c.kubeInformersForOpenshiftEtcdNamespace.Core().V1().ConfigMaps().Informer().HasSynced, - c.operatorConfigClient.Informer().HasSynced) { - utilruntime.HandleError(fmt.Errorf("caches did not sync")) - return - } - - go wait.Until(c.runWorker, time.Second, stopCh) - - <-stopCh -} - -func (c *ClusterMemberController) runWorker() { - for c.processNextWorkItem() { - } -} - -func (c *ClusterMemberController) processNextWorkItem() bool { - dsKey, quit := c.queue.Get() - if quit { - return false - } - defer c.queue.Done(dsKey) - - err := c.sync() - if err == nil { - c.queue.Forget(dsKey) - return true - } - - utilruntime.HandleError(fmt.Errorf("%v failed with : %v", dsKey, err)) - c.queue.AddRateLimited(dsKey) - - return true -} - -// eventHandler queues the operator to check spec and status -func (c *ClusterMemberController) eventHandler() cache.ResourceEventHandler { - return cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { c.queue.Add(workQueueKey) }, - UpdateFunc: func(old, new interface{}) { c.queue.Add(workQueueKey) }, - DeleteFunc: func(obj interface{}) { c.queue.Add(workQueueKey) }, - } -} - -func (c *ClusterMemberController) etcdMemberAdd(peerURLs []string) error { - cli, err := c.getEtcdClient() - defer cli.Close() - if err != nil { - return err - } - ctx, cancel := context.WithCancel(context.Background()) - resp, err := cli.MemberAdd(ctx, peerURLs) - cancel() - if err != nil { - return err - } - klog.Infof("added etcd member.PeerURLs:%s", resp.Member.PeerURLs) - return nil -} - -func (c *ClusterMemberController) RemoveBootstrap() error { - err := c.RemoveBootstrapFromEndpoint() - if err != nil { - return err - } - return c.EtcdMemberRemove("etcd-bootstrap") -} - -func (c *ClusterMemberController) RemoveBootstrapFromEndpoint() error { - hostEndpoint, err := c.clientset.CoreV1(). - Endpoints(EtcdEndpointNamespace). - Get(EtcdHostEndpointName, metav1.GetOptions{}) - if err != nil { - klog.Errorf("error getting endpoint: %#v\n", err) - return err - } - - hostEndpointCopy := hostEndpoint.DeepCopy() - - subsetIndex := -1 - bootstrapIndex := -1 - for sI, s := range hostEndpointCopy.Subsets { - for i, s := range s.Addresses { - if s.Hostname == "etcd-bootstrap" { - bootstrapIndex = i - subsetIndex = sI - break - } - } - } - - if subsetIndex == -1 || bootstrapIndex == -1 { - // Unable to find bootstrap - return nil - } - - if len(hostEndpointCopy.Subsets[subsetIndex].Addresses) <= 1 { - return fmt.Errorf("only etcd-bootstrap endpoint observed, try again") - } - - hostEndpointCopy.Subsets[subsetIndex].Addresses = append(hostEndpointCopy.Subsets[subsetIndex].Addresses[0:bootstrapIndex], hostEndpointCopy.Subsets[subsetIndex].Addresses[bootstrapIndex+1:]...) - - _, err = c.clientset.CoreV1().Endpoints(EtcdEndpointNamespace).Update(hostEndpointCopy) - if err != nil { - klog.Errorf("error updating endpoint: %#v\n", err) - return err - } - - return nil -} - -func (c *ClusterMemberController) getResyncName(pods *corev1.PodList) (string, error) { - name, err := c.getScaleAnnotationName() - if err != nil { - return "", fmt.Errorf("failed to obtain name from annotation %v", err) - } - - for i := range pods.Items { - p := &pods.Items[i] - klog.Errorf("getResyncName: compare %s vs %s\n", p.Name, name) - if p.Name == name { - return name, nil - } - } - return "", nil -} - -func (c *ClusterMemberController) isClusterEtcdOperatorReady() bool { - pendingMembers, err := c.EtcdList("pending") - if err != nil { - klog.Errorf("error getting pending members: %#v", err) - return false - } - if len(pendingMembers) > 0 { - klog.Infof("some members are pending: %#v", pendingMembers) - return false - } - members, err := c.EtcdList("members") - if err != nil { - klog.Errorf("error getting members: %#v", err) - return false - } - if len(members) == 0 { - klog.Infof("no etcd member found") - return false - } - if len(members) == 1 && c.IsMember("etcd-bootstrap") { - klog.Infof("etcd-bootstrap is the only known member") - return false - } - return true -} diff --git a/pkg/operator/clustermembercontroller/clustermembercontroller_test.go b/pkg/operator/clustermembercontroller/clustermembercontroller_test.go deleted file mode 100644 index 2541a864d1..0000000000 --- a/pkg/operator/clustermembercontroller/clustermembercontroller_test.go +++ /dev/null @@ -1,230 +0,0 @@ -package clustermembercontroller - -import ( - "bytes" - "encoding/json" - "testing" - - operatorv1 "github.com/openshift/api/operator/v1" - ceoapi "github.com/openshift/cluster-etcd-operator/pkg/operator/api" - "github.com/openshift/library-go/pkg/operator/events" - "github.com/openshift/library-go/pkg/operator/v1helpers" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/kubernetes/fake" - "k8s.io/client-go/util/workqueue" - "k8s.io/klog" -) - -var ( - clusterDomain = "operator.testing.openshift" - clusterMembersPendingPath = []string{"cluster", "pending"} - clusterMembersPath = []string{"cluster", "members"} -) - -func TestClusterMemberController_RemoveBootstrapFromEndpoint(t *testing.T) { - - client := fake.NewSimpleClientset() - - addressList := []v1.EndpointAddress{ - { - IP: "192.168.2.1", - Hostname: "etcd-bootstrap", - }, - { - IP: "192.168.2.2", - Hostname: "etcd-1", - }, - { - IP: "192.168.2.3", - Hostname: "etcd-2", - }, - { - IP: "192.168.2.4", - Hostname: "etcd-3", - }, - } - ep := &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: EtcdHostEndpointName, - Namespace: EtcdEndpointNamespace, - }, - Subsets: []v1.EndpointSubset{ - { - Addresses: addressList, - }, - }, - } - - _, err := client.CoreV1().Endpoints(EtcdEndpointNamespace).Create(ep) - if err != nil { - t.Fatal() - } - - type fields struct { - clientset kubernetes.Interface - operatorConfigClient v1helpers.OperatorClient - queue workqueue.RateLimitingInterface - eventRecorder events.Recorder - etcdDiscoveryDomain string - } - tests := []struct { - name string - fields fields - wantErr bool - }{ - { - name: "remove 0th address", - fields: fields{ - clientset: client, - etcdDiscoveryDomain: "", - }, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - c := &ClusterMemberController{ - clientset: tt.fields.clientset, - operatorConfigClient: tt.fields.operatorConfigClient, - queue: tt.fields.queue, - eventRecorder: tt.fields.eventRecorder, - etcdDiscoveryDomain: tt.fields.etcdDiscoveryDomain, - } - if err := c.RemoveBootstrapFromEndpoint(); (err != nil) != tt.wantErr { - t.Errorf("RemoveBootstrapFromEndpoint() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } -} - -func getBytes(obj interface{}) ([]byte, error) { - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - if err := enc.Encode(obj); err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -func getEtcdSpec(pending, ready []string) *operatorv1.OperatorSpec { - observedConfig := map[string]interface{}{} - etcdPendingMembers := []interface{}{} - etcdMembers := []interface{}{} - - for _, pm := range pending { - pendingBucket := map[string]interface{}{} - if err := unstructured.SetNestedField(pendingBucket, pm+"-node", "name"); err != nil { - klog.Fatalf("error occured in writing nested fields %#v", err) - } - if err := unstructured.SetNestedField(pendingBucket, "https://"+pm+"."+clusterDomain+":2380", "peerURLs"); err != nil { - klog.Fatalf("error occured in writing nested fields %#v", err) - } - if err := unstructured.SetNestedField(pendingBucket, string(ceoapi.MemberUnknown), "status"); err != nil { - klog.Fatalf("error occured in writing nested fields %#v", err) - } - etcdPendingMembers = append(etcdPendingMembers, pendingBucket) - } - for _, m := range ready { - memberBucket := map[string]interface{}{} - if err := unstructured.SetNestedField(memberBucket, m, "name"); err != nil { - klog.Fatalf("error occured in writing nested fields %#v", err) - } - if err := unstructured.SetNestedField(memberBucket, "https://"+m+"."+clusterDomain+":2380", "peerURLs"); err != nil { - klog.Fatalf("error occured in writing nested fields %#v", err) - } - if err := unstructured.SetNestedField(memberBucket, string(ceoapi.MemberUnknown), "status"); err != nil { - klog.Fatalf("error occured in writing nested fields %#v", err) - } - etcdMembers = append(etcdMembers, memberBucket) - } - if len(pending) > 0 { - if err := unstructured.SetNestedField(observedConfig, etcdPendingMembers, clusterMembersPendingPath...); err != nil { - klog.Fatalf("error occured in writing pending members: %#v", err) - } - } - if len(ready) > 0 { - if err := unstructured.SetNestedField(observedConfig, etcdMembers, clusterMembersPath...); err != nil { - klog.Fatalf("error occured in writing members: %#v", err) - } - } - etcdURLsBytes, err := getBytes(observedConfig) - if err != nil { - klog.Fatalf("error occured in getting bytes for etcdURLs: %#v", err) - } - return &operatorv1.OperatorSpec{ - ObservedConfig: runtime.RawExtension{ - Raw: etcdURLsBytes, - }, - } -} - -func TestClusterMemberController_isClusterEtcdOperatorReady(t *testing.T) { - type fields struct { - operatorConfigClient v1helpers.OperatorClient - } - tests := []struct { - name string - fields fields - want bool - }{ - { - name: "test with 1 pending member and no ready", - fields: fields{ - operatorConfigClient: v1helpers.NewFakeOperatorClient(getEtcdSpec([]string{"etcd-1"}, []string{}), - nil, - nil), - }, - want: false, - }, - { - name: "test with 0 pending member and no ready members", - fields: fields{ - operatorConfigClient: v1helpers.NewFakeOperatorClient(getEtcdSpec([]string{}, []string{}), - nil, - nil), - }, - want: false, - }, - { - name: "test with 0 pending member and etcd-bootstrap ready", - fields: fields{ - operatorConfigClient: v1helpers.NewFakeOperatorClient(getEtcdSpec([]string{}, []string{"etcd-bootstrap"}), - nil, - nil), - }, - want: false, - }, - { - name: "test with 1 pending member and more than 1 ready", - fields: fields{ - operatorConfigClient: v1helpers.NewFakeOperatorClient(getEtcdSpec([]string{"etcd-3"}, []string{"etcd-bootstrap", "etcd-1", "etcd-2"}), - nil, - nil), - }, - want: false, - }, - { - name: "test with 0 pending member and more than 1 ready", - fields: fields{ - operatorConfigClient: v1helpers.NewFakeOperatorClient(getEtcdSpec([]string{}, []string{"etcd-bootstrap", "etcd-1", "etcd-2"}), - nil, - nil), - }, - want: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - c := &ClusterMemberController{ - operatorConfigClient: tt.fields.operatorConfigClient, - } - if got := c.isClusterEtcdOperatorReady(); got != tt.want { - t.Errorf("isClusterEtcdOperatorReady() = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/pkg/operator/clustermembercontroller/util.go b/pkg/operator/clustermembercontroller/util.go index 99a3c00328..0f5f0feb21 100644 --- a/pkg/operator/clustermembercontroller/util.go +++ b/pkg/operator/clustermembercontroller/util.go @@ -1,32 +1,13 @@ package clustermembercontroller import ( - "encoding/json" "fmt" "net" "strings" - ceoapi "github.com/openshift/cluster-etcd-operator/pkg/operator/api" - corev1 "k8s.io/api/core/v1" "k8s.io/klog" ) -func GetScaleAnnotationName(configMap *corev1.ConfigMap) (string, error) { - scaling := &ceoapi.EtcdScaling{} - data, ok := configMap.Annotations[EtcdScalingAnnotationKey] - if !ok { - return "", nil - } - if data == "" { - return "", nil - } - if err := json.Unmarshal([]byte(data), scaling); err != nil { - klog.Infof("unable to unmarshal scaling data %#v\n", err) - return "", err - } - return scaling.Metadata.Name, nil -} - func ReverseLookupSelf(service, proto, name, self string) (string, error) { _, srvs, err := net.LookupSRV(service, proto, name) if err != nil { diff --git a/pkg/operator/clustermembercontroller2/clustermembercontroller.go b/pkg/operator/clustermembercontroller2/clustermembercontroller.go new file mode 100644 index 0000000000..165e0c822c --- /dev/null +++ b/pkg/operator/clustermembercontroller2/clustermembercontroller.go @@ -0,0 +1,420 @@ +package clustermembercontroller2 + +import ( + "context" + "fmt" + "strings" + "time" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + + operatorv1 "github.com/openshift/api/operator/v1" + "github.com/openshift/cluster-etcd-operator/pkg/operator/clustermembercontroller" + "github.com/openshift/library-go/pkg/operator/events" + "github.com/openshift/library-go/pkg/operator/v1helpers" + "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/pkg/transport" + "google.golang.org/grpc" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/informers" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog" +) + +const ( + workQueueKey = "key" + // todo: need to understand how to make this dynamic across all platforms + totalDesiredEtcd = 3 +) + +// watches the etcd static pods, picks one unready pod and adds +// to etcd membership only if all existing members are running healthy +// skips if any one member is unhealthy. +type ClusterMemberController struct { + dynamicClient dynamic.Interface + operatorClient v1helpers.OperatorClient + kubeInformers informers.SharedInformerFactory + endpointsLister corev1listers.EndpointsLister + podLister corev1listers.PodLister + nodeLister corev1listers.NodeLister + + cachesToSync []cache.InformerSynced + queue workqueue.RateLimitingInterface + eventRecorder events.Recorder +} + +func NewClusterMemberController( + dynamicClient dynamic.Interface, + operatorClient v1helpers.OperatorClient, + kubeInformers informers.SharedInformerFactory, + eventRecorder events.Recorder, +) *ClusterMemberController { + c := &ClusterMemberController{ + dynamicClient: dynamicClient, + operatorClient: operatorClient, + endpointsLister: kubeInformers.Core().V1().Endpoints().Lister(), + podLister: kubeInformers.Core().V1().Pods().Lister(), + nodeLister: kubeInformers.Core().V1().Nodes().Lister(), + + cachesToSync: []cache.InformerSynced{ + operatorClient.Informer().HasSynced, + kubeInformers.Core().V1().Endpoints().Informer().HasSynced, + kubeInformers.Core().V1().Pods().Informer().HasSynced, + kubeInformers.Core().V1().ConfigMaps().Informer().HasSynced, + kubeInformers.Core().V1().Nodes().Informer().HasSynced, + operatorClient.Informer().HasSynced, + }, + queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "ClusterMemberController2"), + kubeInformers: kubeInformers, + eventRecorder: eventRecorder.WithComponentSuffix("cluster-member-controller-2"), + } + kubeInformers.Core().V1().Pods().Informer().AddEventHandler(c.eventHandler()) + kubeInformers.Core().V1().Endpoints().Informer().AddEventHandler(c.eventHandler()) + kubeInformers.Core().V1().ConfigMaps().Informer().AddEventHandler(c.eventHandler()) + operatorClient.Informer().AddEventHandler(c.eventHandler()) + + return c +} + +func (c *ClusterMemberController) Run(stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() + defer c.queue.ShutDown() + + klog.Infof("Starting ClusterMemberController2") + defer klog.Infof("Shutting down ClusterMemberController2") + + if !cache.WaitForCacheSync(stopCh, c.cachesToSync...) { + utilruntime.HandleError(fmt.Errorf("caches did not sync")) + return + } + + go wait.Until(c.runWorker, time.Second, stopCh) + + go wait.Until(func() { + c.queue.Add(workQueueKey) + }, time.Minute, stopCh) + + <-stopCh +} + +func (c *ClusterMemberController) runWorker() { + for c.processNextWorkItem() { + } +} + +func (c *ClusterMemberController) processNextWorkItem() bool { + dsKey, quit := c.queue.Get() + if quit { + return false + } + defer c.queue.Done(dsKey) + + err := c.sync() + if err == nil { + c.queue.Forget(dsKey) + return true + } + + utilruntime.HandleError(fmt.Errorf("%v failed with : %v", dsKey, err)) + c.queue.AddRateLimited(dsKey) + + return true +} + +func (c *ClusterMemberController) sync() error { + err := c.reconcileMembers() + if err != nil { + _, _, updateErr := v1helpers.UpdateStatus(c.operatorClient, v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ + Type: "ClusterMemberController2Degraded", + Status: operatorv1.ConditionTrue, + Reason: "Error", + Message: err.Error(), + })) + if updateErr != nil { + c.eventRecorder.Warning("ClusterMemberController2UpdatingStatus", updateErr.Error()) + } + return err + } + + _, _, updateErr := v1helpers.UpdateStatus(c.operatorClient, + v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ + Type: "ClusterMemberController2Degraded", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + })) + return updateErr +} + +func (c *ClusterMemberController) reconcileMembers() error { + etcdHealthy, err := c.areAllEtcdMembersHealthy() + if err != nil { + return err + } + if !etcdHealthy { + c.eventRecorder.Eventf("WaitingOnEtcdMember", "waiting for all member of etcd to be healthy") + return nil + } + + // etcd is healthy, decide if we need to scale + unreadyPods, err := c.getUnreadyEtcdPods() + if err != nil { + return err + } + + if len(unreadyPods) == 0 { + _, _, updateErr := v1helpers.UpdateStatus(c.operatorClient, + v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ + Type: "ClusterMemberControllerScalingProgressing", + Status: operatorv1.ConditionFalse, + Reason: "AsExpected", + Message: "Scaling etcd membership completed", + }), + // todo: remove this make bootsrap remove independent + v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ + Type: "BootstrapSafeToRemove", + Status: operatorv1.ConditionTrue, + Reason: "AsExpected", + Message: "Scaling etcd membership has completed", + })) + if updateErr != nil { + return updateErr + } + // no more work left to do + return nil + } + + _, _, updateErr := v1helpers.UpdateStatus(c.operatorClient, + v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ + Type: "ClusterMemberControllerScalingProgressing", + Status: operatorv1.ConditionTrue, + Reason: "Scaling", + Message: "Scaling etcd membership", + }), + // todo: remove this make bootsrap remove independent + v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ + Type: "BootstrapSafeToRemove", + Status: operatorv1.ConditionFalse, + Reason: "EtcdScaling", + Message: fmt.Sprintf("waiting for %d/%d pods to be scaled", len(unreadyPods), totalDesiredEtcd), + })) + if updateErr != nil { + return updateErr + } + + podFQDN, err := c.getValidPodFQDNToScale(unreadyPods) + if err != nil { + return err + } + + err = c.AddMember(podFQDN) + if err != nil { + return err + } + return nil +} + +func (c *ClusterMemberController) getEtcdClient() (*clientv3.Client, error) { + endpoints, err := c.Endpoints() + if err != nil { + return nil, err + } + + dialOptions := []grpc.DialOption{ + grpc.WithBlock(), // block until the underlying connection is up + } + + tlsInfo := transport.TLSInfo{ + CertFile: "/var/run/secrets/etcd-client/tls.crt", + KeyFile: "/var/run/secrets/etcd-client/tls.key", + TrustedCAFile: "/var/run/configmaps/etcd-ca/ca-bundle.crt", + } + tlsConfig, err := tlsInfo.ClientConfig() + + cfg := &clientv3.Config{ + DialOptions: dialOptions, + Endpoints: endpoints, + DialTimeout: 5 * time.Second, + TLS: tlsConfig, + } + + cli, err := clientv3.New(*cfg) + if err != nil { + return nil, err + } + return cli, err +} + +func (c *ClusterMemberController) Endpoints() ([]string, error) { + etcdDiscoveryDomain, err := c.getEtcdDiscoveryDomain() + if err != nil { + return []string{}, err + } + hostEtcd, err := c.endpointsLister.Endpoints(clustermembercontroller.EtcdEndpointNamespace).Get(clustermembercontroller.EtcdHostEndpointName) + if err != nil { + c.eventRecorder.Warningf("ErrorGettingHostEtcd", "error occured while getting host-etcd endpoint: %#v", err) + return []string{}, err + } + if len(hostEtcd.Subsets) == 0 { + c.eventRecorder.Warningf("EtcdAddressNotFound", "could not find etcd address in host-etcd") + return []string{}, fmt.Errorf("could not find etcd address in host-etcd") + } + var endpoints []string + for _, addr := range hostEtcd.Subsets[0].Addresses { + if addr.Hostname == "etcd-bootstrap" { + endpoints = append(endpoints, fmt.Sprintf("https://%s:2379", addr.IP)) + } else { + endpoints = append(endpoints, fmt.Sprintf("https://%s.%s:2379", addr.Hostname, etcdDiscoveryDomain)) + } + } + return endpoints, nil +} + +func (c *ClusterMemberController) eventHandler() cache.ResourceEventHandler { + return cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { c.queue.Add(workQueueKey) }, + UpdateFunc: func(old, new interface{}) { c.queue.Add(workQueueKey) }, + DeleteFunc: func(obj interface{}) { c.queue.Add(workQueueKey) }, + } +} + +func (c *ClusterMemberController) areAllEtcdMembersHealthy() (bool, error) { + // getting a new client everytime because we dont know what etcd-membership looks like + etcdClient, err := c.getEtcdClient() + if err != nil { + return false, fmt.Errorf("error getting etcd client: %w", err) + } + defer etcdClient.Close() + + memberList, err := etcdClient.MemberList(context.Background()) + if err != nil { + return false, fmt.Errorf("error getting etcd member list: %w", err) + } + for _, member := range memberList.Members { + statusResp, err := etcdClient.Status(context.Background(), member.ClientURLs[0]) + if err != nil { + c.eventRecorder.Warningf("EtcdMemberNotHealthy", "etcd member %s is not healthy: %#v", member.Name, err) + // since error is indicative of unhealthy member, not returning + // the actual error + return false, nil + } + klog.V(4).Infof("etcd member %s is healthy committed and with %d index", member.Name, statusResp.RaftIndex) + } + return true, nil +} + +func (c *ClusterMemberController) getUnreadyEtcdPods() ([]*corev1.Pod, error) { + // list etcd member pods + pods, err := c.podLister.List(labels.Set{"app": "etcd"}.AsSelector()) + if err != nil { + return nil, err + } + + // go through the list of all pods, pick one peerFQDN to return from unready pods + // and collect dns resolution errors on the way. + var unreadyPods []*corev1.Pod + for _, pod := range pods { + ready := false + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady { + ready = condition.Status == corev1.ConditionTrue + klog.V(4).Infof("found pod %s ready", pod.Name) + break + } + } + if !ready { + c.eventRecorder.Eventf("FoundPodToScale", "found pod %s to scale in etcd membership", pod.Name) + unreadyPods = append(unreadyPods, pod) + } + } + return unreadyPods, nil +} + +func (c *ClusterMemberController) AddMember(peerFQDN string) error { + etcdClient, err := c.getEtcdClient() + if err != nil { + c.eventRecorder.Warningf("ErrorGettingEtcdClient", "error getting etcd client: %#v", err) + return err + } + defer etcdClient.Close() + + ctx, cancel := context.WithCancel(context.Background()) + resp, err := etcdClient.MemberAdd(ctx, []string{fmt.Sprintf("https://%s:2380", peerFQDN)}) + cancel() + if err != nil { + c.eventRecorder.Warningf("ErrorAddingMember", "error adding member with peerFQDN %s to etcd api: %#v", peerFQDN, err) + return err + } + c.eventRecorder.Eventf("MemberAdded", "member %s added to etcd membership %#v", resp.Member.Name, resp.Members) + return nil +} + +func (c *ClusterMemberController) getEtcdDiscoveryDomain() (string, error) { + controllerConfig, err := c.dynamicClient. + Resource(schema.GroupVersionResource{Group: "machineconfiguration.openshift.io", Version: "v1", Resource: "controllerconfigs"}). + Get("machine-config-controller", metav1.GetOptions{}) + if err != nil { + return "", err + } + etcdDiscoveryDomain, ok, err := unstructured.NestedString(controllerConfig.Object, "spec", "etcdDiscoveryDomain") + if err != nil { + return "", err + } + if !ok { + return "", fmt.Errorf("controllerconfigs/machine-config-controller missing .spec.etcdDiscoveryDomain") + } + return etcdDiscoveryDomain, nil +} + +// getValidPodFQDNToScale goes through the list on unready pods and +// returns a resolvable podFQDN. If none of the DNSes are available +// yet it will return collected errors. +func (c *ClusterMemberController) getValidPodFQDNToScale(unreadyPods []*corev1.Pod) (string, error) { + etcdDiscoveryDomain, err := c.getEtcdDiscoveryDomain() + if err != nil { + return "", err + } + errorStrings := []string{} + for _, p := range unreadyPods { + if p.Spec.NodeName == "" { + return "", fmt.Errorf("node name empty for %s", p.Name) + } + nodeInternalIP, err := c.getNodeInternalIP(p.Spec.NodeName) + if err != nil { + errorStrings = append(errorStrings, err.Error()) + } + podFQDN, err := clustermembercontroller.ReverseLookupSelf("etcd-server-ssl", "tcp", etcdDiscoveryDomain, nodeInternalIP) + if err != nil { + errorStrings = append(errorStrings, err.Error()) + } + return podFQDN, nil + } + if len(errorStrings) > 0 { + return "", fmt.Errorf("%s", strings.Join(errorStrings, ",")) + } + return "", fmt.Errorf("cannot get a valid podFQDN to scale") +} + +func (c *ClusterMemberController) getNodeInternalIP(nodeName string) (string, error) { + node, err := c.nodeLister.Get(nodeName) + if err != nil { + return "", err + } + if node.Status.Addresses == nil { + return "", fmt.Errorf("cannot get node IP address, addresses for node %s is nil", nodeName) + } + + for _, addr := range node.Status.Addresses { + if addr.Type == corev1.NodeInternalIP { + return addr.Address, nil + } + } + return "", fmt.Errorf("unable to get internal IP address for node %s", nodeName) +} diff --git a/pkg/operator/configobservation/configobservercontroller/observe_config_controller.go b/pkg/operator/configobservation/configobservercontroller/observe_config_controller.go index 13a0206fa1..1dca8ee68d 100644 --- a/pkg/operator/configobservation/configobservercontroller/observe_config_controller.go +++ b/pkg/operator/configobservation/configobservercontroller/observe_config_controller.go @@ -11,8 +11,6 @@ import ( "github.com/openshift/library-go/pkg/operator/v1helpers" "github.com/openshift/cluster-etcd-operator/pkg/operator/configobservation" - "github.com/openshift/cluster-etcd-operator/pkg/operator/configobservation/etcd" - "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" ) @@ -31,7 +29,7 @@ func NewConfigObserver( interestingNamespaces := []string{ operatorclient.GlobalUserSpecifiedConfigNamespace, operatorclient.GlobalMachineSpecifiedConfigNamespace, - operatorclient.TargetNamespace, + "openshift-etcd", operatorclient.OperatorNamespace, } @@ -61,9 +59,6 @@ func NewConfigObserver( kubeInformersForNamespaces.InformersFor("").Core().V1().Nodes().Informer().HasSynced, ), }, - etcd.ObserveStorageURLs, - etcd.ObserveClusterMembers, - etcd.ObservePendingClusterMembers, ), } diff --git a/pkg/operator/configobservation/etcd/OWNERS b/pkg/operator/configobservation/etcd/OWNERS deleted file mode 100644 index ec16c0b125..0000000000 --- a/pkg/operator/configobservation/etcd/OWNERS +++ /dev/null @@ -1,16 +0,0 @@ -reviewers: - - abhinavdahiya - - deads2k - - hexfusion - - alaypatel07 - - mfojtik - - soltysh - - sttts -approvers: - - abhinavdahiya - - deads2k - - hexfusion - - alaypatel07 - - mfojtik - - sttts - - tnozicka diff --git a/pkg/operator/configobservation/etcd/observe_etcd.go b/pkg/operator/configobservation/etcd/observe_etcd.go deleted file mode 100644 index 77f8d17b66..0000000000 --- a/pkg/operator/configobservation/etcd/observe_etcd.go +++ /dev/null @@ -1,565 +0,0 @@ -package etcd - -import ( - "fmt" - "net" - "reflect" - "strings" - "time" - - "github.com/cloudflare/cfssl/log" - ceoapi "github.com/openshift/cluster-etcd-operator/pkg/operator/api" - "github.com/openshift/cluster-etcd-operator/pkg/operator/clustermembercontroller" - corelistersv1 "k8s.io/client-go/listers/core/v1" - - "github.com/openshift/cluster-etcd-operator/pkg/operator/configobservation" - - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/klog" - - "github.com/openshift/library-go/pkg/operator/configobserver" - "github.com/openshift/library-go/pkg/operator/events" -) - -const ( - Pending = "pending" - Member = "member" - numberOfInitContainers = 4 -) - -type etcdObserver struct { - listers configobservation.Listers - existingConfig map[string]interface{} - endpoints []string - HealthyMember map[string]bool - ClusterDomain string - - memberPath []string - pendingPath []string - - ObservedMembers []interface{} - ObservedPending []interface{} - PreviouslyObservedPending []ceoapi.Member - recorder events.Recorder -} - -// ObserveClusterMembers observes the current etcd cluster members. -func ObserveClusterMembers(genericListers configobserver.Listers, recorder events.Recorder, existingConfig map[string]interface{}) (map[string]interface{}, []error) { - observedConfig := map[string]interface{}{} - healthyMember := make(map[string]bool) - var errs []error - - observer := etcdObserver{ - listers: genericListers.(configobservation.Listers), - memberPath: []string{"cluster", "members"}, - pendingPath: []string{"cluster", "pending"}, - HealthyMember: healthyMember, - } - if err := observer.setClusterDomain(); err != nil { - return existingConfig, append(errs, err) - } - previouslyObservedMembers, err := observer.getPathObservationData(observer.memberPath, existingConfig) - if err != nil { - errs = append(errs, err) - } - - // etcd-bootstrap is a special case, we make initial assuptions based on the existance of the value in host endpoints - // once we scale down bootstrap this record should be removed. - if err := observer.setBootstrapMember(); err != nil { - errs = append(errs, err) - } - - if err := observer.setObservedEtcdFromEndpoint("members"); err != nil { - errs = append(errs, err) - } - - if previouslyObservedMembers == nil { - if len(errs) > 0 { - return existingConfig, errs - } - if len(observer.ObservedMembers) > 0 { - if err := unstructured.SetNestedField(observedConfig, observer.ObservedMembers, observer.memberPath...); err != nil { - return existingConfig, append(errs, err) - } - } - if !reflect.DeepEqual(previouslyObservedMembers, observer.ObservedMembers) { - recorder.Eventf("ObserveClusterMembersUpdated", "Updated cluster members to %v", observer.ObservedMembers) - } - return observedConfig, nil - } - - previousMembers, err := getMembersFromConfig(previouslyObservedMembers) - if err != nil { - errs = append(errs, err) - } - - for _, previousMember := range previousMembers { - if observer.HealthyMember[previousMember.Name] || previousMember.Name == "etcd-bootstrap" { - continue - } - _, err := observer.listers.OpenshiftEtcdPodsLister.Pods(clustermembercontroller.EtcdEndpointNamespace).Get(previousMember.Name) - if errors.IsNotFound(err) { - // verify the node exists - //TODO this is very opnionated could this come from the endpoint? - nodeName := strings.TrimPrefix(previousMember.Name, "etcd-member-") - - _, err := observer.listers.NodeLister.Get(nodeName) - if errors.IsNotFound(err) { - // if the node is no londer available we use the endpoint observatiopn - klog.Warningf("error: Node %s not found: adding remove status to %s ", nodeName, previousMember.Name) - clusterMember, err := setMember(previousMember.Name, previousMember.PeerURLS, ceoapi.MemberRemove) - if err != nil { - return existingConfig, append(errs, err) - - } - observer.ObservedMembers = append(observer.ObservedMembers, clusterMember) - continue - } - } - } - - if len(errs) > 0 { - if err := unstructured.SetNestedSlice(observedConfig, previouslyObservedMembers, observer.memberPath...); err != nil { - return existingConfig, append(errs, err) - } - return observedConfig, append(errs, err) - } - - if len(observer.ObservedMembers) > 0 { - if err := unstructured.SetNestedField(observedConfig, observer.ObservedMembers, observer.memberPath...); err != nil { - return existingConfig, append(errs, err) - } - } - - if !reflect.DeepEqual(previouslyObservedMembers, observer.ObservedMembers) { - recorder.Eventf("ObserveClusterMembersUpdated", "Updated cluster members to %v", observer.ObservedMembers) - } - return observedConfig, nil -} - -func ObservePendingClusterMembers(genericListers configobserver.Listers, recorder events.Recorder, existingConfig map[string]interface{}) (map[string]interface{}, []error) { - observedConfig := map[string]interface{}{} - healthyMember := make(map[string]bool) - var errs []error - - observer := etcdObserver{ - listers: genericListers.(configobservation.Listers), - memberPath: []string{"cluster", "members"}, - pendingPath: []string{"cluster", "pending"}, - HealthyMember: healthyMember, - } - if err := observer.setClusterDomain(); err != nil { - return existingConfig, append(errs, err) - } - previouslyObservedPending, err := observer.getPathObservationData(observer.pendingPath, existingConfig) - if err != nil { - errs = append(errs, err) - } - // order is important this is needed before setObserved - previousPending, err := getMembersFromConfig(previouslyObservedPending) - if err != nil { - errs = append(errs, err) - } - observer.PreviouslyObservedPending = previousPending - - if err := observer.setObservedEtcdFromEndpoint("pending"); err != nil { - errs = append(errs, err) - } - - if len(observer.ObservedPending) > 0 { - if err := unstructured.SetNestedField(observedConfig, observer.ObservedPending, observer.pendingPath...); err != nil { - return existingConfig, append(errs, err) - } - } - - if len(errs) > 0 { - if previouslyObservedPending != nil { - if err := unstructured.SetNestedSlice(observedConfig, observer.ObservedPending, observer.pendingPath...); err != nil { - errs = append(errs, err) - } - } - return existingConfig, append(errs, err) - } - - if !reflect.DeepEqual(previouslyObservedPending, observer.ObservedPending) { - recorder.Eventf("ObservePendingClusterMembersUpdated", "Updated pending cluster members to %v", observer.ObservedPending) - } - return observedConfig, nil -} - -func isPendingReady(bucket string, podName string, scalingName string, podLister corelistersv1.PodLister) bool { - if bucket == "pending" && podName != scalingName { - return false - } - pod, err := podLister.Pods(clustermembercontroller.EtcdEndpointNamespace).Get(podName) - if err != nil { - klog.Errorf("isPendingReady: error getting pod %#v", err) - return false - } - if len(pod.Spec.InitContainers) < numberOfInitContainers { - return true - } - if len(pod.Status.InitContainerStatuses) < 2 { - klog.Infof("isPendingReady: waiting for init cert containers to pass") - return false - } - certsInitContainerNumber := getInitContainerNumber(pod, "certs") - membershipInitContainerNumber := getInitContainerNumber(pod, "membership") - if pod.Status.InitContainerStatuses[certsInitContainerNumber].State.Terminated != nil && pod.Status.InitContainerStatuses[certsInitContainerNumber].State.Terminated.ExitCode == 0 && pod.Status.InitContainerStatuses[membershipInitContainerNumber].State.Running != nil { - return true - } - - if pod.Status.Phase == corev1.PodRunning { - return true - } - - return false -} - -func getInitContainerNumber(pod *corev1.Pod, initContainerName string) int { - for i, c := range pod.Spec.InitContainers { - if c.Name == initContainerName { - return i - } - } - return -1 -} - -func isPendingAdd(bucket string, podName string, previousPending []ceoapi.Member, scalingName string) bool { - if bucket == "members" { - return false - } - for _, previous := range previousPending { - // we observe previously Ready as a condition to eval Add - if previous.Conditions != nil { - if previous.Name == podName && previous.Conditions[0].Type == ceoapi.MemberReady { - log.Infof("checking Pod name %s vs CM name %s", podName, scalingName) - if podName == scalingName { - return true - } - } - } - } - return false - -} - -// ObserveStorageURLs observes the storage config URLs. If there is a problem observing the current storage config URLs, -// then the previously observed storage config URLs will be re-used. -func ObserveStorageURLs(genericListers configobserver.Listers, recorder events.Recorder, currentConfig map[string]interface{}) (observedConfig map[string]interface{}, errs []error) { - listers := genericListers.(configobservation.Listers) - observedConfig = map[string]interface{}{} - storageConfigURLsPath := []string{"storageConfig", "urls"} - - currentEtcdURLs, found, err := unstructured.NestedStringSlice(currentConfig, storageConfigURLsPath...) - if err != nil { - errs = append(errs, err) - } - if found { - if err := unstructured.SetNestedStringSlice(observedConfig, currentEtcdURLs, storageConfigURLsPath...); err != nil { - errs = append(errs, err) - } - } - - var observerdClusterMembers []string - etcdEndpoints, err := listers.OpenshiftEtcdEndpointsLister.Endpoints(clustermembercontroller.EtcdEndpointNamespace).Get(clustermembercontroller.EtcdHostEndpointName) - if errors.IsNotFound(err) { - recorder.Warningf("ObserveStorageFailed", "Required %s/%s endpoint not found", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName) - errs = append(errs, fmt.Errorf("endpoints/host-etcd.openshift-etcd: not found")) - return - } - if err != nil { - recorder.Warningf("ObserveStorageFailed", "Error getting %s/%s endpoint: %v", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName, err) - errs = append(errs, err) - return - } - dnsSuffix := etcdEndpoints.Annotations["alpha.installer.openshift.io/dns-suffix"] - if len(dnsSuffix) == 0 { - dnsErr := fmt.Errorf("endpoints %s/%s: alpha.installer.openshift.io/dns-suffix annotation not found", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName) - recorder.Warning("ObserveStorageFailed", dnsErr.Error()) - errs = append(errs, dnsErr) - return - } - for subsetIndex, subset := range etcdEndpoints.Subsets { - for addressIndex, address := range subset.Addresses { - if address.Hostname == "" { - addressErr := fmt.Errorf("endpoints %s/%s: subsets[%v]addresses[%v].hostname not found", clustermembercontroller.EtcdHostEndpointName, clustermembercontroller.EtcdEndpointNamespace, subsetIndex, addressIndex) - recorder.Warningf("ObserveStorageFailed", addressErr.Error()) - errs = append(errs, addressErr) - continue - } - if ip := net.ParseIP(address.IP); ip == nil { - ipErr := fmt.Errorf("endpoints %s/%s: subsets[%v]addresses[%v].IP is not a valid IP address", clustermembercontroller.EtcdHostEndpointName, clustermembercontroller.EtcdEndpointNamespace, subsetIndex, addressIndex) - errs = append(errs, ipErr) - continue - } - // the installer uses dummy addresses in the subnet `192.0.2.` for host-etcd endpoints - // this check see if etcd-bootstrap is populated with a real ip address and uses it - // instead of FQDN - if address.Hostname == "etcd-bootstrap" && !strings.HasPrefix(address.IP, "192.0.2") { - observerdClusterMembers = append(observerdClusterMembers, "https://"+address.IP+":2379") - continue - } - observerdClusterMembers = append(observerdClusterMembers, "https://"+address.Hostname+"."+dnsSuffix+":2379") - } - } - - if len(observerdClusterMembers) == 0 { - emptyURLErr := fmt.Errorf("endpoints %s/%s: no etcd endpoint addresses found", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName) - recorder.Warning("ObserveStorageFailed", emptyURLErr.Error()) - errs = append(errs, emptyURLErr) - } - - if len(errs) > 0 { - return - } - - if err := unstructured.SetNestedStringSlice(observedConfig, observerdClusterMembers, storageConfigURLsPath...); err != nil { - errs = append(errs, err) - return - } - - if !reflect.DeepEqual(currentEtcdURLs, observerdClusterMembers) { - recorder.Eventf("ObserveStorageUpdated", "Updated storage urls to %s", strings.Join(observerdClusterMembers, ",")) - } - - return -} - -func (e *etcdObserver) getPathObservationData(path []string, existingConfig map[string]interface{}) ([]interface{}, error) { - data, _, err := unstructured.NestedSlice(existingConfig, path...) - if err != nil { - return nil, err - } - return data, nil -} - -func isPodCrashLoop(bucket string, pod *corev1.Pod) bool { - if bucket == "members" { - return false - } - for _, containerStatus := range pod.Status.ContainerStatuses { - if containerStatus.Name != "etcd-member" { - continue - } - if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason == "CrashLoopBackOff" { - if containerStatus.LastTerminationState.Terminated != nil { - for _, cond := range pod.Status.Conditions { - if cond.Type == "Initialized" && cond.Status == "True" { - delay := cond.LastTransitionTime.Time.Add(+time.Minute * 5) - if containerStatus.LastTerminationState.Terminated.FinishedAt.After(delay) { - klog.Warningf("isPodCrashLoop: pod %s was observed in CrashLoop", pod.Name) - return true - } - return false - } - } - } - } - } - return false -} - -func (e *etcdObserver) setClusterDomain() error { - endpoints, err := e.listers.OpenshiftEtcdEndpointsLister.Endpoints(clustermembercontroller.EtcdEndpointNamespace).Get(clustermembercontroller.EtcdHostEndpointName) - if errors.IsNotFound(err) { - e.recorder.Warningf("ObserveClusterPending", "Required %s/%s endpoint not found", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName) - return err - } - if err != nil { - e.recorder.Warningf("ObserveClusterPending", "Error getting %s/%s endpoint: %v", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName, err) - return err - } - clusterDomain := endpoints.Annotations["alpha.installer.openshift.io/dns-suffix"] - if len(clusterDomain) == 0 { - err := fmt.Errorf("endpoints %s/%s: alpha.installer.openshift.io/dns-suffix annotation not found", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName) - e.recorder.Warning("ObserveClusterMembers", err.Error()) - return err - } - e.ClusterDomain = clusterDomain - return nil -} - -func (e *etcdObserver) setBootstrapMember() error { - endpoints, err := e.listers.OpenshiftEtcdEndpointsLister.Endpoints(clustermembercontroller.EtcdEndpointNamespace).Get(clustermembercontroller.EtcdHostEndpointName) - if errors.IsNotFound(err) { - e.recorder.Warningf("setBootstrapMember", "Required %s/%s endpoint not found", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName) - return err - } - if err != nil { - e.recorder.Warningf("setBootstrapMember", "Error getting %s/%s endpoint: %v", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName, err) - return err - } - - for _, subset := range endpoints.Subsets { - for _, address := range subset.Addresses { - if address.Hostname == "etcd-bootstrap" { - peerURLs := fmt.Sprintf("https://%s:2380", address.IP) - clusterMember, err := setMember(address.Hostname, []string{peerURLs}, ceoapi.MemberUnknown) - if err != nil { - return err - } - e.ObservedMembers = append(e.ObservedMembers, clusterMember) - } - } - } - return nil -} - -func (e *etcdObserver) setObservedEtcdFromEndpoint(bucket string) error { - var endpointAddressList []corev1.EndpointAddress - - endpoints, err := e.listers.OpenshiftEtcdEndpointsLister.Endpoints(clustermembercontroller.EtcdEndpointNamespace).Get(clustermembercontroller.EtcdEndpointName) - if errors.IsNotFound(err) { - e.recorder.Warningf("setObservedFromEndpoint", "Required %s/%s endpoint not found", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName) - return err - } - if err != nil { - e.recorder.Warningf("setObservedFromEndpoint", "Error getting %s/%s endpoint: %v", clustermembercontroller.EtcdEndpointNamespace, clustermembercontroller.EtcdHostEndpointName, err) - return err - } - for _, subset := range endpoints.Subsets { - switch bucket { - case "members": - endpointAddressList = subset.Addresses - case "pending": - // probably should be a struct? - endpointAddressList = subset.NotReadyAddresses - } - for _, address := range endpointAddressList { - name := address.TargetRef.Name - cm, err := e.listers.OpenshiftEtcdConfigMapsLister.ConfigMaps(clustermembercontroller.EtcdEndpointNamespace).Get("member-config") - if err != nil { - return err - } - scalingName, err := clustermembercontroller.GetScaleAnnotationName(cm) - if err != nil { - return err - } - pod, err := e.listers.OpenshiftEtcdPodsLister.Pods(clustermembercontroller.EtcdEndpointNamespace).Get(name) - if err != nil { - return err - } - - status := ceoapi.MemberUnknown - switch { - case isPodCrashLoop(bucket, pod): - status = ceoapi.MemberRemove - break - case isPendingReady(bucket, pod.Name, scalingName, e.listers.OpenshiftEtcdPodsLister): - status = ceoapi.MemberReady - break - default: - status = ceoapi.MemberUnknown - } - - var peerFQDN string - // allow testing - if e.ClusterDomain == "operator.testing.openshift" { - peerFQDN = "etcd-1.operator.testing.openshift" - } else { - peerFQDN, err = clustermembercontroller.ReverseLookupSelf("etcd-server-ssl", "tcp", e.ClusterDomain, address.IP) - if err != nil { - return fmt.Errorf("error looking up self: %v", err) - } - } - - peerURLs := fmt.Sprintf("https://%s:2380", peerFQDN) - etcd, err := setMember(name, []string{peerURLs}, status) - if err != nil { - return err - } - - switch bucket { - case "members": - e.HealthyMember[name] = true - e.ObservedMembers = append(e.ObservedMembers, etcd) - case "pending": - e.ObservedPending = append(e.ObservedPending, etcd) - } - } - } - return nil -} - -func (e *etcdObserver) isPendingRemoval(members ceoapi.Member, existingConfig map[string]interface{}) (bool, error) { - previousPendingObserved, found, err := unstructured.NestedSlice(existingConfig, e.pendingPath...) - if err != nil { - return false, err - } - if found { - previousPending, err := getMembersFromConfig(previousPendingObserved) - if err != nil { - return false, err - } - - for _, pendingMember := range previousPending { - if pendingMember.Conditions == nil { - return false, nil - } - if pendingMember.Name == members.Name && pendingMember.Conditions[0].Type == ceoapi.MemberRemove { - return true, nil - } - } - } - return false, nil -} - -//TODO move to util -func getMembersFromConfig(config []interface{}) ([]ceoapi.Member, error) { - var members []ceoapi.Member - for _, member := range config { - memberMap, _ := member.(map[string]interface{}) - name, exists, err := unstructured.NestedString(memberMap, "name") - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("member name does not exist") - } - peerURLs, exists, err := unstructured.NestedString(memberMap, "peerURLs") - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("member peerURLs do not exist") - } - - status, exists, err := unstructured.NestedString(memberMap, "status") - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("member status does not exist") - } - - condition := ceoapi.GetMemberCondition(status) - m := ceoapi.Member{ - Name: name, - PeerURLS: []string{peerURLs}, - Conditions: []ceoapi.MemberCondition{ - { - Type: condition, - }, - }, - } - members = append(members, m) - } - return members, nil -} - -func setMember(name string, peerURLs []string, status ceoapi.MemberConditionType) (interface{}, error) { - etcdURL := map[string]interface{}{} - if err := unstructured.SetNestedField(etcdURL, name, "name"); err != nil { - return nil, err - } - if err := unstructured.SetNestedField(etcdURL, peerURLs[0], "peerURLs"); err != nil { - return nil, err - } - if err := unstructured.SetNestedField(etcdURL, string(status), "status"); err != nil { - return nil, err - } - return etcdURL, nil -} diff --git a/pkg/operator/configobservation/etcd/observe_etcd_test.go b/pkg/operator/configobservation/etcd/observe_etcd_test.go deleted file mode 100644 index b90111fe25..0000000000 --- a/pkg/operator/configobservation/etcd/observe_etcd_test.go +++ /dev/null @@ -1,171 +0,0 @@ -package etcd - -import ( - "github.com/openshift/cluster-etcd-operator/pkg/operator/clustermembercontroller" - - "reflect" - "testing" - - ceoapi "github.com/openshift/cluster-etcd-operator/pkg/operator/api" - "github.com/openshift/cluster-etcd-operator/pkg/operator/configobservation" - "github.com/openshift/library-go/pkg/operator/configobserver" - "github.com/openshift/library-go/pkg/operator/events" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/client-go/kubernetes/fake" - corev1listers "k8s.io/client-go/listers/core/v1" - "k8s.io/client-go/tools/cache" -) - -func fakeObjectReference(ep *v1.Endpoints) *v1.ObjectReference { - return &v1.ObjectReference{ - Kind: ep.Kind, - Namespace: ep.Namespace, - Name: ep.Name, - UID: ep.UID, - APIVersion: ep.APIVersion, - ResourceVersion: ep.ResourceVersion, - } -} - -func TestObservePendingClusterMembers(t *testing.T) { - node := "ip-10-0-139-142.ec2.internal" - podIP := "10.0.139.142" - clusterDomain := "operator.testing.openshift" - clusterMemberPath := []string{"cluster", "pending"} - var etcdURLs []interface{} - observedConfig := map[string]interface{}{} - etcdURL := map[string]interface{}{} - - if err := unstructured.SetNestedField(etcdURL, node, "name"); err != nil { - t.Fatalf("error occured in writing nested fields %#v", err) - } - if err := unstructured.SetNestedField(etcdURL, "https://etcd-1."+clusterDomain+":2380", "peerURLs"); err != nil { - t.Fatalf("error occured in writing nested fields %#v", err) - } - if err := unstructured.SetNestedField(etcdURL, string(ceoapi.MemberUnknown), "status"); err != nil { - t.Fatalf("error occured in writing nested fields %#v", err) - } - etcdURLs = append(etcdURLs, etcdURL) - if err := unstructured.SetNestedField(observedConfig, etcdURLs, clusterMemberPath...); err != nil { - t.Fatalf("error occured in writing nested fields observedConfig: %#v", err) - } - addressList := []v1.EndpointAddress{ - { - IP: podIP, - Hostname: "", - NodeName: &node, - TargetRef: &v1.ObjectReference{ - Name: node, - }, - }, - } - - etcdEndpoint := &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: "etcd", - Namespace: clustermembercontroller.EtcdEndpointNamespace, - }, - Subsets: []v1.EndpointSubset{ - { - NotReadyAddresses: addressList, - }, - }, - } - - etcdHostEndpoint := &v1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: "host-etcd", - Namespace: clustermembercontroller.EtcdEndpointNamespace, - Annotations: map[string]string{"alpha.installer.openshift.io/dns-suffix": clusterDomain}, - }, - Subsets: []v1.EndpointSubset{ - { - NotReadyAddresses: addressList, - }, - }, - } - - memberConfigMap := &v1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: clustermembercontroller.EtcdEndpointNamespace, - Name: "member-config", - Annotations: map[string]string{clustermembercontroller.EtcdScalingAnnotationKey: ""}, - }, - } - - etcdMemberPod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ip-10-0-139-142.ec2.internal", - Namespace: clustermembercontroller.EtcdEndpointNamespace, - }, - } - - index := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) - if err := index.Add(etcdEndpoint); err != nil { - t.Fatal() - } - if err := index.Add(etcdHostEndpoint); err != nil { - t.Fatal() - } - if err := index.Add(memberConfigMap); err != nil { - t.Fatal() - } - if err := index.Add(etcdMemberPod); err != nil { - t.Fatal() - } - c := configobservation.Listers{ - OpenshiftEtcdEndpointsLister: corev1listers.NewEndpointsLister(index), - OpenshiftEtcdConfigMapsLister: corev1listers.NewConfigMapLister(index), - OpenshiftEtcdPodsLister: corev1listers.NewPodLister(index), - } - client := fake.NewSimpleClientset() - r := events.NewRecorder(client.CoreV1().Events("test-namespace"), "test-operator", - fakeObjectReference(etcdEndpoint)) - - type args struct { - genericListers configobserver.Listers - recorder events.Recorder - currentConfig map[string]interface{} - } - tests := []struct { - name string - args args - wantObservedConfig map[string]interface{} - wantErrs []error - runAfter func() - }{ - // TODO: Refine the test cases. - { - name: "bootstrapping test case", - args: args{ - genericListers: c, - recorder: r, - currentConfig: make(map[string]interface{}), - }, - wantObservedConfig: observedConfig, - wantErrs: nil, - runAfter: func() { - etcdEndpoint.Subsets[0].NotReadyAddresses = []v1.EndpointAddress{} - etcdEndpoint.Subsets[0].Addresses = addressList - err := index.Update(etcdEndpoint) - if err != nil { - t.Fatalf("error updating endpoint %v", err) - } - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - gotObservedConfig, gotErrs := ObservePendingClusterMembers(tt.args.genericListers, tt.args.recorder, tt.args.currentConfig) - if !reflect.DeepEqual(gotErrs, tt.wantErrs) { - t.Errorf("ObservePendingClusterMembers() gotErrs = %v, want %v", gotErrs, tt.wantErrs) - } - if !reflect.DeepEqual(gotObservedConfig, tt.wantObservedConfig) { - t.Errorf("ObservePendingClusterMembers() gotObservedConfig = %s, want %s", gotObservedConfig, tt.wantObservedConfig) - } - tt.runAfter() - }) - } -} diff --git a/pkg/operator/etcd_assets/bindata.go b/pkg/operator/etcd_assets/bindata.go index 443ea6f7a8..5727558fec 100644 --- a/pkg/operator/etcd_assets/bindata.go +++ b/pkg/operator/etcd_assets/bindata.go @@ -152,7 +152,8 @@ metadata: namespace: openshift-etcd labels: app: etcd - etcd: "not-true-yet" + k8s-app: etcd + etcd: "true" revision: "REVISION" spec: containers: @@ -167,16 +168,50 @@ spec: #!/bin/sh set -euo pipefail - sleep 24h - - exit 0 - - # add logic here to confirm that we are part of the etcd members (the controller added us). - # this is probably a golang command that tries to confirm for two minutes before exiting - # and prints nothing except for the ETCD_INITIAL_CLUSTER - + ETCDCTL="etcdctl --cacert=/etc/kubernetes/static-pod-resources/configmaps/etcd-serving-ca/ca-bundle.crt \ + --cert=/etc/kubernetes/static-pod-resources/secrets/etcd-all-peer/etcd-peer-NODE_NAME.crt \ + --key=/etc/kubernetes/static-pod-resources/secrets/etcd-all-peer/etcd-peer-NODE_NAME.key \ + --endpoints=${ALL_ETCD_ENDPOINTS}" + ${ETCDCTL} member list + + echo "waiting for member $NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME..." + COUNT=30 + while [ $COUNT -gt 0 ]; do + IS_MEMBER_PRESENT=$(${ETCDCTL} member list | grep -o "${NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME}.*:2380") + if [[ -n "${IS_MEMBER_PRESENT:-}" ]]; then + break + fi + sleep 1 + let COUNT=$COUNT-1 + done + + # if the member is not present after 30 seconds + if [ -z "$IS_MEMBER_PRESENT" ]; then + echo "member $NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME is not present after 30 seconds" + exit 1 + fi + echo "member $NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME is present, continuing" + + initial_cluster="" + member_output=$( ${ETCDCTL} member list | cut -d',' -f3 ) + for endpoint_key in ${member_output}; do + endpoint=$(${ETCDCTL} member list | grep $endpoint_key | awk -F'[, ]' '{ print $7 }') + initial_cluster+="$endpoint_key=$endpoint," + echo "adding $endpoint_key=$endpoint," + done + # add this pod to the list + initial_cluster+="$NODE_NODE_ENVVAR_NAME_ETCD_NAME=https://$NODE_NODE_ENVVAR_NAME_ETCD_DNS_NAME:2380" + echo $initial_cluster + + # at this point we know this member is added. To support a transition, we must remove the old etcd pod. + # move it somewhere safe so we can retrieve it again later if something goes badly. + mv /etc/kubernetes/manifests/etcd-member.yaml /etc/kubernetes/etcd-backup-dir || true + + export ETCD_INITIAL_CLUSTER="${initial_cluster}" export ETCD_NAME=${NODE_NODE_ENVVAR_NAME_ETCD_NAME} + env | grep ETCD | grep -v NODE + set -x exec etcd \ --initial-advertise-peer-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2380 \ --cert-file=/etc/kubernetes/static-pod-resources/secrets/etcd-all-serving/etcd-serving-NODE_NAME.crt \ @@ -190,23 +225,40 @@ spec: --advertise-client-urls=https://${NODE_NODE_ENVVAR_NAME_IP}:2379 \ --listen-client-urls=https://${LISTEN_ON_ALL_IPS}:2379 \ --listen-peer-urls=https://${LISTEN_ON_ALL_IPS}:2380 \ - --listen-metrics-urls=https://${LISTEN_ON_ALL_IPS}:9978 + --listen-metrics-urls=https://${LISTEN_ON_ALL_IPS}:9978 || mv /etc/kubernetes/etcd-backup-dir/etcd-member.yaml /etc/kubernetes/manifests env: ${COMPUTED_ENV_VARS} resources: requests: - memory: 200Mi - cpu: 100m + memory: 600Mi + cpu: 300m limits: - memory: 200Mi - cpu: 100m + memory: 600Mi + cpu: 300m + readinessProbe: + exec: + command: + - /bin/sh + - -ec + - "lsof -n -i :2380 | grep LISTEN" + failureThreshold: 3 + initialDelaySeconds: 3 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 5 + securityContext: + privileged: true volumeMounts: - - mountPath: /etc/kubernetes/static-pod-resources - name: resource-dir - - mountPath: /etc/kubernetes/static-pod-certs - name: cert-dir - - mountPath: /var/lib/etcd/ - name: data-dir + - mountPath: /etc/kubernetes/manifests + name: static-pod-dir + - mountPath: /etc/kubernetes/etcd-backup-dir + name: etcd-backup-dir + - mountPath: /etc/kubernetes/static-pod-resources + name: resource-dir + - mountPath: /etc/kubernetes/static-pod-certs + name: cert-dir + - mountPath: /var/lib/etcd/ + name: data-dir - name: etcd-metrics image: ${IMAGE} imagePullPolicy: IfNotPresent @@ -218,14 +270,6 @@ ${COMPUTED_ENV_VARS} #!/bin/sh set -euo pipefail - sleep 24h - - exit 0 - - # add logic here to confirm that we are part of the etcd members (the controller added us). - # this is probably a golang command that tries to confirm for two minutes before exiting - # and prints nothing except for the ETCD_INITIAL_CLUSTER - export ETCD_NAME=${NODE_NODE_ENVVAR_NAME_ETCD_NAME} exec etcd grpc-proxy start \ @@ -247,6 +291,8 @@ ${COMPUTED_ENV_VARS} limits: memory: 200Mi cpu: 100m + securityContext: + privileged: true volumeMounts: - mountPath: /etc/kubernetes/static-pod-resources name: resource-dir @@ -259,16 +305,22 @@ ${COMPUTED_ENV_VARS} tolerations: - operator: "Exists" volumes: - - hostPath: - path: /etc/kubernetes/static-pod-resources/etcd-pod-REVISION - name: resource-dir - - hostPath: - path: /etc/kubernetes/static-pod-resources/etcd-certs - name: cert-dir - - hostPath: - path: /var/lib/etcd - type: "" - name: data-dir + - hostPath: + path: /etc/kubernetes/manifests + name: static-pod-dir + - hostPath: + path: /etc/kubernetes/static-pod-resources/etcd-member + name: etcd-backup-dir + - hostPath: + path: /etc/kubernetes/static-pod-resources/etcd-pod-REVISION + name: resource-dir + - hostPath: + path: /etc/kubernetes/static-pod-resources/etcd-certs + name: cert-dir + - hostPath: + path: /var/lib/etcd + type: "" + name: data-dir `) diff --git a/pkg/operator/etcdcertsigner2/etcdcertsignercontroller.go b/pkg/operator/etcdcertsigner2/etcdcertsignercontroller.go index e98d36eb7c..57b13e9711 100644 --- a/pkg/operator/etcdcertsigner2/etcdcertsignercontroller.go +++ b/pkg/operator/etcdcertsigner2/etcdcertsignercontroller.go @@ -216,12 +216,12 @@ func (c *EtcdCertSignerController) syncAllMasters() error { combinedServingSecret.Data[getServingSecretNameForNode(node)+".key"] = currServing.Data["tls.key"] } - currServingMetrics, err := c.secretLister.Secrets(operatorclient.TargetNamespace).Get(getPeerClientSecretNameForNode(node)) + currServingMetrics, err := c.secretLister.Secrets(operatorclient.TargetNamespace).Get(getServingMetricsSecretNameForNode(node)) if err != nil { errs = append(errs, err) } else { - combinedServingMetricsSecret.Data[getPeerClientSecretNameForNode(node)+".crt"] = currServingMetrics.Data["tls.crt"] - combinedServingMetricsSecret.Data[getPeerClientSecretNameForNode(node)+".key"] = currServingMetrics.Data["tls.key"] + combinedServingMetricsSecret.Data[getServingMetricsSecretNameForNode(node)+".crt"] = currServingMetrics.Data["tls.crt"] + combinedServingMetricsSecret.Data[getServingMetricsSecretNameForNode(node)+".key"] = currServingMetrics.Data["tls.key"] } } if len(errs) > 0 { diff --git a/pkg/operator/hostendpointscontroller/host_endpoints_controller.go b/pkg/operator/hostendpointscontroller/host_endpoints_controller.go new file mode 100644 index 0000000000..37438a5d95 --- /dev/null +++ b/pkg/operator/hostendpointscontroller/host_endpoints_controller.go @@ -0,0 +1,342 @@ +package hostendpointscontroller + +import ( + "context" + "fmt" + "net" + "sort" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/errors" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/selection" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/dynamic/dynamicinformer" + "k8s.io/client-go/kubernetes" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog" + + operatorv1 "github.com/openshift/api/operator/v1" + "github.com/openshift/library-go/pkg/operator/events" + "github.com/openshift/library-go/pkg/operator/resource/resourceapply" + "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" + "github.com/openshift/library-go/pkg/operator/v1helpers" + operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers" +) + +const ( + workQueueKey = "key" +) + +// HostEndpointsController maintains an Endpoints resource with +// the dns names of the current etcd cluster members for use by +// components unable to use the etcd service directly. +type HostEndpointsController struct { + eventRecorder events.Recorder + queue workqueue.RateLimitingInterface + cachesToSync []cache.InformerSynced + operatorClient v1helpers.OperatorClient + machineConfigLister cache.GenericLister + podLister corev1listers.PodLister + endpointsLister corev1listers.EndpointsLister + endpointsClient corev1client.EndpointsGetter +} + +func NewHostEndpointsController( + operatorClient v1helpers.OperatorClient, + eventRecorder events.Recorder, + kubeClient kubernetes.Interface, + kubeInformersForNamespaces operatorv1helpers.KubeInformersForNamespaces, + dynamicInformers dynamicinformer.DynamicSharedInformerFactory, +) *HostEndpointsController { + kubeInformersForTargetNamespace := kubeInformersForNamespaces.InformersFor("openshift-etcd") + endpointsInformer := kubeInformersForTargetNamespace.Core().V1().Endpoints() + podInformer := kubeInformersForTargetNamespace.Core().V1().Pods() + machineConfigInformers := dynamicInformers.ForResource(schema.GroupVersionResource{ + Group: "machineconfiguration.openshift.io", Version: "v1", Resource: "controllerconfigs"}) + + c := &HostEndpointsController{ + eventRecorder: eventRecorder.WithComponentSuffix("host-etcd-endpoints-controller"), + queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "HostEndpointsController"), + cachesToSync: []cache.InformerSynced{ + operatorClient.Informer().HasSynced, + endpointsInformer.Informer().HasSynced, + podInformer.Informer().HasSynced, + machineConfigInformers.Informer().HasSynced, + }, + operatorClient: operatorClient, + machineConfigLister: machineConfigInformers.Lister(), + podLister: podInformer.Lister(), + endpointsLister: endpointsInformer.Lister(), + endpointsClient: kubeClient.CoreV1(), + } + operatorClient.Informer().AddEventHandler(c.eventHandler()) + endpointsInformer.Informer().AddEventHandler(c.eventHandler()) + machineConfigInformers.Informer().AddEventHandler(c.eventHandler()) + podInformer.Informer().AddEventHandler(c.eventHandler()) + return c +} + +func (c *HostEndpointsController) sync() error { + err := c.syncHostEndpoints() + + if err != nil { + _, _, updateErr := v1helpers.UpdateStatus(c.operatorClient, v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ + Type: "HostEndpointsDegraded", + Status: operatorv1.ConditionTrue, + Reason: "ErrorUpdatingHostEndpoints", + Message: err.Error(), + })) + if updateErr != nil { + c.eventRecorder.Warning("HostEndpointsErrorUpdatingStatus", updateErr.Error()) + return updateErr + } + return err + } + + _, _, updateErr := v1helpers.UpdateStatus(c.operatorClient, v1helpers.UpdateConditionFn(operatorv1.OperatorCondition{ + Type: "HostEndpointsDegraded", + Status: operatorv1.ConditionFalse, + Reason: "HostEndpointsUpdated", + })) + if updateErr != nil { + c.eventRecorder.Warning("HostEndpointsErrorUpdatingStatus", updateErr.Error()) + return updateErr + } + return nil +} + +func (c *HostEndpointsController) syncHostEndpoints() error { + discoveryDomain, err := c.getEtcdDiscoveryDomain() + if err != nil { + return fmt.Errorf("unable to determine etcd discovery domain: %v", err) + } + + // list etcd member pods + etcdPodRequirement, err := labels.NewRequirement("k8s-app", selection.In, []string{"etcd"}) + if err != nil { + return err + } + etcdPodSelector := labels.NewSelector().Add(*etcdPodRequirement) + pods, err := c.podLister.List(etcdPodSelector) + + // get dns names of ready etc member pods + var addresses []string + for _, pod := range pods { + var ready bool + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady { + ready = condition.Status == corev1.ConditionTrue + break + } + } + if ready { + dnsName, err := c.getEtcdDNSName(discoveryDomain, pod.Status.HostIP) + if err != nil { + return fmt.Errorf("unable to determine dns name for etcd member on node %s: %v", pod.Spec.NodeName, err) + } + addresses = append(addresses, dnsName) + } + } + + if len(addresses) == 0 { + return fmt.Errorf("no etcd member pods are ready") + } + + required := hostEndpointsAsset() + + if required.Annotations == nil { + required.Annotations = map[string]string{} + } + required.Annotations["alpha.installer.openshift.io/dns-suffix"] = discoveryDomain + + sort.Strings(addresses) + for i, address := range addresses { + required.Subsets[0].Addresses = append(required.Subsets[0].Addresses, corev1.EndpointAddress{ + Hostname: strings.TrimSuffix(address, "."+discoveryDomain), + IP: net.IPv4(byte(192), byte(0), byte(2), byte(i+1)).String(), + }) + } + + // if etcd-bootstrap exists, keep it (at the end of the list) + existing, err := c.endpointsLister.Endpoints("openshift-etcd").Get("host-etcd") + if err != nil && !errors.IsNotFound(err) { + return err + } + if !errors.IsNotFound(err) { + for _, endpointAddress := range existing.Subsets[0].Addresses { + if endpointAddress.Hostname == "etcd-bootstrap" { + required.Subsets[0].Addresses = append(required.Subsets[0].Addresses, *endpointAddress.DeepCopy()) + break + } + } + } + + return c.applyEndpoints(required) +} + +func hostEndpointsAsset() *corev1.Endpoints { + return &corev1.Endpoints{ + ObjectMeta: v1.ObjectMeta{ + Name: "host-etcd", + Namespace: "openshift-etcd", + }, + Subsets: []corev1.EndpointSubset{ + { + Ports: []corev1.EndpointPort{ + { + Name: "etcd", + Port: 2379, + Protocol: "TCP", + }, + }, + }, + }, + } +} + +func (c *HostEndpointsController) getEtcdDiscoveryDomain() (string, error) { + controllerConfigObj, err := c.machineConfigLister.Get("machine-config-controller") + if err != nil { + return "", err + } + controllerConfig := controllerConfigObj.(*unstructured.Unstructured).UnstructuredContent() + + etcdDiscoveryDomain, ok, err := unstructured.NestedString(controllerConfig, "spec", "etcdDiscoveryDomain") + if err != nil { + return "", err + } + if !ok { + return "", fmt.Errorf("controllerconfigs/machine-config-controller missing .spec.etcdDiscoveryDomain") + } + return etcdDiscoveryDomain, nil +} + +func (c *HostEndpointsController) getEtcdDNSName(discoveryDomain, ip string) (string, error) { + dnsName, err := reverseLookup("etcd-server-ssl", "tcp", discoveryDomain, ip) + if err != nil { + return "", err + } + return dnsName, nil +} + +// returns the target from the SRV record that resolves to ip. +func reverseLookup(service, proto, name, ip string) (string, error) { + _, srvs, err := net.LookupSRV(service, proto, name) + if err != nil { + return "", err + } + selfTarget := "" + for _, srv := range srvs { + klog.V(4).Infof("checking against %s", srv.Target) + addrs, err := net.LookupHost(srv.Target) + if err != nil { + return "", fmt.Errorf("could not resolve member %q", srv.Target) + } + + for _, addr := range addrs { + if addr == ip { + selfTarget = strings.Trim(srv.Target, ".") + break + } + } + } + if selfTarget == "" { + return "", fmt.Errorf("could not find self") + } + return selfTarget, nil +} + +func (c *HostEndpointsController) applyEndpoints(required *corev1.Endpoints) error { + existing, err := c.endpointsLister.Endpoints("openshift-etcd").Get("host-etcd") + if errors.IsNotFound(err) { + _, err := c.endpointsClient.Endpoints("openshift-etcd").Create(required) + if err != nil { + c.eventRecorder.Warningf("EndpointsCreateFailed", "Failed to create endpoints/%s -n %s: %v", required.Name, required.Namespace, err) + return err + } + c.eventRecorder.Warningf("EndpointsCreated", "Created endpoints/%s -n %s because it was missing", required.Name, required.Namespace) + } + if err != nil { + return err + } + modified := resourcemerge.BoolPtr(false) + toWrite := existing.DeepCopy() + resourcemerge.EnsureObjectMeta(modified, &toWrite.ObjectMeta, required.ObjectMeta) + if !equality.Semantic.DeepEqual(existing.Subsets, required.Subsets) { + toWrite.Subsets = make([]corev1.EndpointSubset, len(required.Subsets)) + for i := range required.Subsets { + required.Subsets[i].DeepCopyInto(&(toWrite.Subsets)[i]) + } + *modified = true + } + if !*modified { + // no update needed + return nil + } + jsonPatch := resourceapply.JSONPatchNoError(existing, toWrite) + if klog.V(4) { + klog.Infof("Endpoints %q changes: %v", required.Namespace+"/"+required.Name, jsonPatch) + } + _, err = c.endpointsClient.Endpoints("openshift-etcd").Update(toWrite) + if err != nil { + c.eventRecorder.Warningf("EndpointsUpdateFailed", "Failed to update endpoints/%s -n %s: %v", required.Name, required.Namespace, err) + return err + } + c.eventRecorder.Warningf("EndpointsUpdated", "Updated endpoints/%s -n %s because it changed: %v", required.Name, required.Namespace, jsonPatch) + return nil +} + +func (c *HostEndpointsController) Run(ctx context.Context, workers int) { + defer utilruntime.HandleCrash() + defer c.queue.ShutDown() + klog.Infof("Starting HostEtcdEndpointsController") + defer klog.Infof("Shutting down HostEtcdEndpointsController") + if !cache.WaitForCacheSync(ctx.Done(), c.cachesToSync...) { + return + } + go wait.Until(c.runWorker, time.Second, ctx.Done()) + <-ctx.Done() +} + +func (c *HostEndpointsController) runWorker() { + for c.processNextWorkItem() { + } +} + +func (c *HostEndpointsController) processNextWorkItem() bool { + dsKey, quit := c.queue.Get() + if quit { + return false + } + defer c.queue.Done(dsKey) + + err := c.sync() + if err == nil { + c.queue.Forget(dsKey) + return true + } + utilruntime.HandleError(fmt.Errorf("%v failed with : %v", dsKey, err)) + c.queue.AddRateLimited(dsKey) + + return true +} + +func (c *HostEndpointsController) eventHandler() cache.ResourceEventHandler { + // eventHandler queues the operator to check spec and status + return cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { c.queue.Add(workQueueKey) }, + UpdateFunc: func(old, new interface{}) { c.queue.Add(workQueueKey) }, + DeleteFunc: func(obj interface{}) { c.queue.Add(workQueueKey) }, + } +} diff --git a/pkg/operator/hostetcdendpointcontroller/hostendpointcontroller.go b/pkg/operator/hostetcdendpointcontroller/hostendpointcontroller.go deleted file mode 100644 index d6a214ebc4..0000000000 --- a/pkg/operator/hostetcdendpointcontroller/hostendpointcontroller.go +++ /dev/null @@ -1,246 +0,0 @@ -package hostetcdendpointcontroller - -import ( - "fmt" - "math/rand" - "strconv" - "time" - - "github.com/openshift/cluster-etcd-operator/pkg/operator/clustermembercontroller" - "github.com/openshift/library-go/pkg/operator/events" - "github.com/openshift/library-go/pkg/operator/v1helpers" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/informers" - corev1client "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/util/workqueue" - "k8s.io/klog" -) - -const ( - workQueueKey = "key" - subnetPrefix = "192.0.2." - maxIPAddress = 255 -) - -type HostEtcdEndpointController struct { - // todo: use endpoint lister - clientset corev1client.Interface - operatorConfigClient v1helpers.OperatorClient - queue workqueue.RateLimitingInterface - kubeInformersForOpenshiftEtcdnamespace informers.SharedInformerFactory - healthyEtcdMemberGetter HealthyEtcdMembersGetter - eventRecorder events.Recorder -} - -func NewHostEtcdEndpointcontroller( - clientset corev1client.Interface, - operatorConfigClient v1helpers.OperatorClient, - - kubeInformersForOpenshiftEtcdNamespace informers.SharedInformerFactory, - eventRecorder events.Recorder, -) *HostEtcdEndpointController { - h := &HostEtcdEndpointController{ - clientset: clientset, - operatorConfigClient: operatorConfigClient, - queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "HostEtcdEndpointController"), - kubeInformersForOpenshiftEtcdnamespace: kubeInformersForOpenshiftEtcdNamespace, - healthyEtcdMemberGetter: NewHealthyEtcdMemberGetter(operatorConfigClient), - eventRecorder: eventRecorder.WithComponentSuffix("host-etcd-endpoint-controller"), - } - operatorConfigClient.Informer().AddEventHandler(h.eventHandler()) - h.kubeInformersForOpenshiftEtcdnamespace.Core().V1().Endpoints().Informer().AddEventHandler(h.eventHandler()) - //TODO: remove this when liveness probe is added to etcd-member.yaml. - h.kubeInformersForOpenshiftEtcdnamespace.Core().V1().Pods().Informer().AddEventHandler(h.eventHandler()) - return h -} - -func (h *HostEtcdEndpointController) Run(i int, stopCh <-chan struct{}) { - defer utilruntime.HandleCrash() - defer h.queue.ShutDown() - - klog.Infof("Starting ClusterMemberController") - defer klog.Infof("Shutting down ClusterMemberController") - - if !cache.WaitForCacheSync(stopCh, - h.operatorConfigClient.Informer().HasSynced, - h.kubeInformersForOpenshiftEtcdnamespace.Core().V1().Endpoints().Informer().HasSynced, - h.kubeInformersForOpenshiftEtcdnamespace.Core().V1().Pods().Informer().HasSynced) { - utilruntime.HandleError(fmt.Errorf("caches did not sync")) - return - } - - go wait.Until(h.runWorker, time.Second, stopCh) - - <-stopCh -} - -func (h *HostEtcdEndpointController) runWorker() { - for h.processNextWorkItem() { - } -} - -func (h *HostEtcdEndpointController) processNextWorkItem() bool { - dsKey, quit := h.queue.Get() - if quit { - return false - } - defer h.queue.Done(dsKey) - - err := h.sync() - if err == nil { - h.queue.Forget(dsKey) - return true - } - - utilruntime.HandleError(fmt.Errorf("%v failed with : %v", dsKey, err)) - h.queue.AddRateLimited(dsKey) - - return true -} - -func (h *HostEtcdEndpointController) eventHandler() cache.ResourceEventHandler { - // eventHandler queues the operator to check spec and status - return cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { h.queue.Add(workQueueKey) }, - UpdateFunc: func(old, new interface{}) { h.queue.Add(workQueueKey) }, - DeleteFunc: func(obj interface{}) { h.queue.Add(workQueueKey) }, - } -} - -func (h *HostEtcdEndpointController) sync() error { - ep, err := h.clientset.CoreV1().Endpoints(clustermembercontroller.EtcdEndpointNamespace). - Get(clustermembercontroller.EtcdHostEndpointName, metav1.GetOptions{}) - if err != nil { - klog.Errorf("error getting %s/%s endpoint: %#v", - clustermembercontroller.EtcdEndpointNamespace, - clustermembercontroller.EtcdEndpointName, - err, - ) - return err - } - if len(ep.Subsets) != 1 { - klog.Errorf("length of host endpoint subset is not equal to 1") - return fmt.Errorf("unexpected length of host endpoint subset") - } - - newEP := ep.DeepCopy() - - newSubset, err := h.getNewAddressSubset(newEP.Subsets[0].Addresses) - if err != nil { - klog.Errorf("error getting new address subset: %#v", err) - } - - newEP.Subsets[0].Addresses = newSubset - _, err = h.clientset.CoreV1().Endpoints(clustermembercontroller.EtcdEndpointNamespace).Update(newEP) - return err -} - -func (h *HostEtcdEndpointController) getNewAddressSubset(addresses []corev1.EndpointAddress) ([]corev1.EndpointAddress, error) { - hostnames := make([]string, len(addresses)) - ipAddresses := make([]string, len(addresses)) - for _, h := range addresses { - hostnames = append(hostnames, h.Hostname) - ipAddresses = append(ipAddresses, h.IP) - } - healthyMembers, err := h.healthyEtcdMemberGetter.GetHealthyEtcdMembers() - if err != nil { - return nil, err - } - add, remove := diff(hostnames, healthyMembers) - - newSubset := []corev1.EndpointAddress{} - - for _, h := range addresses { - if ok := in(remove, h.Hostname); !ok { - newSubset = append(newSubset, h) - } - } - - // Since max of master etcd is 7 safe to not reuse the ip addresses of removed members. - newIPAddresses := pickUniqueIPAddress(ipAddresses, len(add)) - for i, m := range add { - newSubset = append(newSubset, corev1.EndpointAddress{ - IP: newIPAddresses[i], - Hostname: m, - }) - } - - makeEtcdBootstrapLast(newSubset) - - return newSubset, nil -} - -// since etcd-bootstrap will be removed after successfull scale up -// we need to make sure it is the last in the list of endpoint addresses -// the kube apiserver reads from this list and will use it as the last -// endpoint if all the other fails -func makeEtcdBootstrapLast(addresses []corev1.EndpointAddress) { - if len(addresses) < 2 { - return - } - for index, addr := range addresses { - if addr.Hostname == "etcd-bootstrap" && index != len(addresses)-1 { - e := addr - addresses = append(addresses[0:index], addresses[index+1:]...) - addresses = append(addresses, e) - return - } - } -} - -func pickUniqueIPAddress(assignedIPAddresses []string, newIPAddressNeeded int) []string { - ipAddresses := make([]string, len(assignedIPAddresses)) - newIPAddresses := make([]string, newIPAddressNeeded) - copy(ipAddresses, assignedIPAddresses) - src := rand.NewSource(time.Now().Unix()) - r := rand.New(src) - for i := 0; i < newIPAddressNeeded; i++ { - tryIP := subnetPrefix + strconv.Itoa(r.Intn(maxIPAddress)) - for ok := in(ipAddresses, tryIP); ok; { - tryIP = subnetPrefix + strconv.Itoa(r.Intn(maxIPAddress)) - ok = in(ipAddresses, tryIP) - } - newIPAddresses[i] = tryIP - ipAddresses = append(ipAddresses, tryIP) - } - return newIPAddresses -} - -func diff(hostnames, healthyMembers []string) (add, remove []string) { - // todo: temporary hack to make sure kube-apiserver - // is always started with 2 urls - // currently, it is taking a lot of time for KAS to - // roll out new config. this leverages the client - // load balancer - if in(hostnames, "etcd-bootstrap") { - return - } - for _, h := range hostnames { - if ok := in(healthyMembers, h); !ok { - if h == "etcd-bootstrap" { - continue - } - remove = append(remove, h) - } - } - - for _, m := range healthyMembers { - if ok := in(hostnames, m); !ok { - add = append(add, m) - } - } - return -} - -func in(list []string, member string) bool { - for _, element := range list { - if element == member { - return true - } - } - return false -} diff --git a/pkg/operator/hostetcdendpointcontroller/hostendpointcontroller_test.go b/pkg/operator/hostetcdendpointcontroller/hostendpointcontroller_test.go deleted file mode 100644 index 9a35ab481d..0000000000 --- a/pkg/operator/hostetcdendpointcontroller/hostendpointcontroller_test.go +++ /dev/null @@ -1,458 +0,0 @@ -package hostetcdendpointcontroller - -import ( - "reflect" - "testing" - - corev1 "k8s.io/api/core/v1" -) - -func Test_diff(t *testing.T) { - type args struct { - hostnames []string - healthyMembers []string - } - tests := []struct { - name string - args args - wantAdd []string - wantRemove []string - }{ - // if etcd-bootstrap is in healthy member, it needs to have - // etcd-1 in healthy member, temporary hack for KAS - { - name: "only etcd-bootstrap", - args: args{ - hostnames: []string{"etcd-bootstrap"}, - healthyMembers: []string{"etcd-bootstrap"}, - }, - wantAdd: nil, - wantRemove: nil, - }, - { - name: "scaling: add a member after etcd-bootstrap", - args: args{ - hostnames: []string{"etcd-bootstrap"}, - healthyMembers: []string{"etcd-bootstrap", "etcd-1"}, - }, - wantAdd: nil, - wantRemove: nil, - }, - { - name: "scaling: add second member after etcd-bootstrap and etcd-0", - args: args{ - hostnames: []string{"etcd-bootstrap", "etcd-0"}, - healthyMembers: []string{"etcd-bootstrap", "etcd-0", "etcd-1"}, - }, - wantAdd: nil, - wantRemove: nil, - }, - { - name: "scaling: ignore etcd-bootstrap member", - args: args{ - hostnames: []string{"etcd-bootstrap", "etcd-0", "etcd-1"}, - healthyMembers: []string{"etcd-0", "etcd-1"}, - }, - wantAdd: nil, - wantRemove: nil, - }, - { - name: "scaling: add etcd-2 at the same time", - args: args{ - hostnames: []string{"etcd-bootstrap", "etcd-0", "etcd-1"}, - healthyMembers: []string{"etcd-0", "etcd-1", "etcd-2"}, - }, - wantAdd: nil, - wantRemove: nil, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - gotAdd, gotRemove := diff(tt.args.hostnames, tt.args.healthyMembers) - if !reflect.DeepEqual(gotAdd, tt.wantAdd) { - t.Errorf("diff() gotAdd = %v, want %v", gotAdd, tt.wantAdd) - } - if !reflect.DeepEqual(gotRemove, tt.wantRemove) { - t.Errorf("diff() gotRemove = %v, want %v", gotRemove, tt.wantRemove) - } - }) - } -} - -func Test_pickIpAddress(t *testing.T) { - type args struct { - assignedIPAddresses []string - newIPAddressNeeded int - } - tests := []struct { - name string - args args - }{ - { - name: "case scaling from etcd-bootstrap", - args: args{ - assignedIPAddresses: []string{subnetPrefix + "1"}, - newIPAddressNeeded: 2, - }, - }, - { - name: "case scaline from 2 nodes", - args: args{ - assignedIPAddresses: []string{subnetPrefix + "102", subnetPrefix + "114"}, - newIPAddressNeeded: 3, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := pickUniqueIPAddress(tt.args.assignedIPAddresses, tt.args.newIPAddressNeeded) - if len(got) != tt.args.newIPAddressNeeded { - t.Fatalf("got %d, needed %d", len(got), tt.args.newIPAddressNeeded) - } - for _, ip := range got { - if ok := in(tt.args.assignedIPAddresses, ip); ok { - t.Fatalf("ip %s in already asigned %#v", ip, tt.args.assignedIPAddresses) - } - tt.args.assignedIPAddresses = append(tt.args.assignedIPAddresses, ip) - } - }) - } -} - -type fakeEtcdMemberGetter []string - -func (f fakeEtcdMemberGetter) GetHealthyEtcdMembers() ([]string, error) { - return f, nil -} - -func TestHostEtcdEndpointController_getNewAddressSubset(t *testing.T) { - type fields struct { - healthyEtcdMemberGetter HealthyEtcdMembersGetter - } - type args struct { - addresses []corev1.EndpointAddress - } - tests := []struct { - name string - fields fields - args args - want []corev1.EndpointAddress - wantErr bool - }{ - { - name: "scaling up from bootstrap", - fields: fields{healthyEtcdMemberGetter: fakeEtcdMemberGetter{"etcd-bootstrap", "etcd-0", "etcd-1"}}, - args: args{addresses: []corev1.EndpointAddress{ - { - Hostname: "etcd-boostrap", - IP: subnetPrefix + "1", - }, - }}, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-1", - }, - { - Hostname: "etcd-bootstrap", - IP: subnetPrefix + "1", - }, - }, - wantErr: false, - }, - { - name: "scaling down etcd-bootstrap", - fields: fields{healthyEtcdMemberGetter: fakeEtcdMemberGetter{"etcd-0", "etcd-1"}}, - args: args{addresses: []corev1.EndpointAddress{ - { - Hostname: "etcd-boostrap", - IP: subnetPrefix + "1", - }, - { - Hostname: "etcd-0", - IP: subnetPrefix + "2", - }, - { - Hostname: "etcd-1", - IP: subnetPrefix + "3", - }, - }}, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-1", - }, - }, - wantErr: false, - }, - { - name: "scaling down etcd-bootstrap and scale another member at the same time", - fields: fields{healthyEtcdMemberGetter: fakeEtcdMemberGetter{"etcd-0", "etcd-1", "etcd-2"}}, - args: args{addresses: []corev1.EndpointAddress{ - { - Hostname: "etcd-boostrap", - IP: subnetPrefix + "1", - }, - { - Hostname: "etcd-0", - IP: subnetPrefix + "2", - }, - { - Hostname: "etcd-1", - IP: subnetPrefix + "3", - }, - }}, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-1", - }, - { - Hostname: "etcd-2", - }, - }, - wantErr: false, - }, - { - name: "no scaling, just rearranging", - fields: fields{healthyEtcdMemberGetter: fakeEtcdMemberGetter{"etcd-bootstrap", "etcd-0", "etcd-1"}}, - args: args{addresses: []corev1.EndpointAddress{ - { - Hostname: "etcd-boostrap", - IP: subnetPrefix + "1", - }, - { - Hostname: "etcd-0", - IP: subnetPrefix + "2", - }, - { - Hostname: "etcd-1", - IP: subnetPrefix + "3", - }, - }}, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-1", - }, - { - Hostname: "etcd-bootstrap", - IP: subnetPrefix + "1", - }, - }, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - h := &HostEtcdEndpointController{ - healthyEtcdMemberGetter: tt.fields.healthyEtcdMemberGetter, - } - got, err := h.getNewAddressSubset(tt.args.addresses) - if (err != nil) != tt.wantErr { - t.Errorf("getNewAddressSubset() error = %v, wantErr %v", err, tt.wantErr) - return - } - if len(got) != len(tt.want) { - t.Errorf("getNewAddressSubset() got length = %v, want length %v", len(got), len(tt.want)) - return - } - for i, addr := range got { - if addr.Hostname != tt.want[i].Hostname { - t.Errorf("for index %d want hostname %v, got %v", i, addr.Hostname, tt.want[i].Hostname) - return - } - } - }) - } -} - -func Test_makeEtcdBootstrapLast(t *testing.T) { - tests := []struct { - name string - args []corev1.EndpointAddress - want []corev1.EndpointAddress - }{ - { - name: "test 1 member", - args: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - }, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - }, - }, - { - name: "test 1 member as etcd-bootstrap", - args: []corev1.EndpointAddress{ - { - Hostname: "etcd-bootstrap", - }, - }, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-bootstrap", - }, - }, - }, - { - name: "test no-op", - args: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-bootstrap", - }, - }, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-bootstrap", - }, - }, - }, - { - name: "test with no bootstrap", - args: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-1", - }, - { - Hostname: "etcd-2", - }, - }, - want: []corev1.EndpointAddress{ - - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-1", - }, - { - Hostname: "etcd-2", - }, - }, - }, - { - name: "test rearranging with 2 members", - args: []corev1.EndpointAddress{ - { - Hostname: "etcd-bootstrap", - }, - { - Hostname: "etcd-0", - }, - }, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-bootstrap", - }, - }, - }, - { - name: "test rearranging with 3 members", - args: []corev1.EndpointAddress{ - { - Hostname: "etcd-bootstrap", - }, - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-2", - }, - }, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-2", - }, - { - Hostname: "etcd-bootstrap", - }, - }, - }, - { - name: "test rearranging with 3 members again", - args: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-bootstrap", - }, - { - Hostname: "etcd-2", - }, - }, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-2", - }, - { - Hostname: "etcd-bootstrap", - }, - }, - }, - { - name: "test rearranging with 3 members for lat time", - args: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-2", - }, - { - Hostname: "etcd-bootstrap", - }, - }, - want: []corev1.EndpointAddress{ - { - Hostname: "etcd-0", - }, - { - Hostname: "etcd-2", - }, - { - Hostname: "etcd-bootstrap", - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - makeEtcdBootstrapLast(tt.args) - if !reflect.DeepEqual(tt.args, tt.want) { - t.Errorf("want = %#v got = %#v", tt.want, tt.args) - } - }) - } -} diff --git a/pkg/operator/hostetcdendpointcontroller/members.go b/pkg/operator/hostetcdendpointcontroller/members.go deleted file mode 100644 index 79e3fab53b..0000000000 --- a/pkg/operator/hostetcdendpointcontroller/members.go +++ /dev/null @@ -1,194 +0,0 @@ -package hostetcdendpointcontroller - -import ( - "bytes" - "encoding/json" - "fmt" - "net" - "net/url" - "strings" - "time" - - ceoapi "github.com/openshift/cluster-etcd-operator/pkg/operator/api" - "github.com/openshift/library-go/pkg/operator/v1helpers" - "go.etcd.io/etcd/clientv3" - "go.etcd.io/etcd/pkg/transport" - "google.golang.org/grpc" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/klog" -) - -const ( - etcdCertFile = "/var/run/secrets/etcd-client/tls.crt" - etcdKeyFile = "/var/run/secrets/etcd-client/tls.key" - etcdTrustedCAFile = "/var/run/configmaps/etcd-ca/ca-bundle.crt" - dialTimeout = 20 * time.Second -) - -type HealthyEtcdMembersGetter interface { - GetHealthyEtcdMembers() ([]string, error) -} - -type healthyEtcdMemberGetter struct { - operatorConfigClient v1helpers.OperatorClient -} - -func NewHealthyEtcdMemberGetter(operatorConfigClient v1helpers.OperatorClient) HealthyEtcdMembersGetter { - return &healthyEtcdMemberGetter{operatorConfigClient} -} - -func (h *healthyEtcdMemberGetter) GetHealthyEtcdMembers() ([]string, error) { - member, err := h.EtcdList("members") - if err != nil { - return nil, err - } - hostnames := make([]string, 0) - for _, m := range member { - hostname, err := getEtcdName(m.PeerURLS[0]) - if err != nil { - return nil, err - } - hostnames = append(hostnames, hostname) - } - return hostnames, nil -} - -// getEtcdName returns the name of the peer from a valid peerURL -func getEtcdName(peerURL string) (string, error) { - if peerURL == "" { - return "", fmt.Errorf("getEtcdName: peerURL is empty") - } - if strings.Contains(peerURL, "etcd-") { - return strings.TrimPrefix(strings.Split(peerURL, ".")[0], "https://"), nil - } - u, err := url.Parse(peerURL) - if err != nil { - return "", err - } - host, port, _ := net.SplitHostPort(u.Host) - //TODO peer port should be a global constant - if IsIP(host) && port == "2380" { - return "etcd-bootstrap", nil - } - return "", fmt.Errorf("getEtcdName: peerURL %q is not properly formatted", peerURL) -} - -func (h *healthyEtcdMemberGetter) EtcdList(bucket string) ([]ceoapi.Member, error) { - configPath := []string{"cluster", bucket} - operatorSpec, _, _, err := h.operatorConfigClient.GetOperatorState() - if err != nil { - return nil, err - } - config := map[string]interface{}{} - if err := json.NewDecoder(bytes.NewBuffer(operatorSpec.ObservedConfig.Raw)).Decode(&config); err != nil { - klog.V(4).Infof("decode of existing config failed with error: %v", err) - } - data, exists, err := unstructured.NestedSlice(config, configPath...) - if err != nil { - return nil, err - } - members := []ceoapi.Member{} - if !exists { - return members, nil - } - - // populate current etcd members as observed. - for _, member := range data { - memberMap, _ := member.(map[string]interface{}) - name, exists, err := unstructured.NestedString(memberMap, "name") - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("member name does not exist") - } - peerURLs, exists, err := unstructured.NestedString(memberMap, "peerURLs") - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("member peerURLs do not exist") - } - // why have different terms i.e. status and condition? can we choose one and mirror? - status, exists, err := unstructured.NestedString(memberMap, "status") - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("member status does not exist") - } - - condition := ceoapi.GetMemberCondition(status) - if condition == ceoapi.MemberReady { - m := ceoapi.Member{ - Name: name, - PeerURLS: []string{peerURLs}, - Conditions: []ceoapi.MemberCondition{ - { - Type: condition, - }, - }, - } - members = append(members, m) - } - } - return members, nil -} - -func (h *healthyEtcdMemberGetter) getEtcdClient() (*clientv3.Client, error) { - endpoints, err := h.Endpoints() - if err != nil { - return nil, err - } - tlsInfo := transport.TLSInfo{ - CertFile: etcdCertFile, - KeyFile: etcdKeyFile, - TrustedCAFile: etcdTrustedCAFile, - } - tlsConfig, err := tlsInfo.ClientConfig() - - dialOptions := []grpc.DialOption{ - grpc.WithBlock(), // block until the underlying connection is up - } - cfg := &clientv3.Config{ - DialOptions: dialOptions, - Endpoints: endpoints, - DialTimeout: dialTimeout, - TLS: tlsConfig, - } - - cli, err := clientv3.New(*cfg) - if err != nil { - return nil, err - } - return cli, err -} - -func (h *healthyEtcdMemberGetter) Endpoints() ([]string, error) { - storageConfigURLsPath := []string{"storageConfig", "urls"} - operatorSpec, _, _, err := h.operatorConfigClient.GetOperatorState() - if err != nil { - return nil, err - } - config := map[string]interface{}{} - if err := json.NewDecoder(bytes.NewBuffer(operatorSpec.ObservedConfig.Raw)).Decode(&config); err != nil { - klog.V(4).Infof("decode of existing config failed with error: %v", err) - } - endpoints, exists, err := unstructured.NestedStringSlice(config, storageConfigURLsPath...) - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("etcd storageConfig urls not observed") - } - - return endpoints, nil -} - -//TODO add to util -func IsIP(addr string) bool { - if ip := net.ParseIP(addr); ip != nil { - return true - } - return false -} diff --git a/pkg/operator/hostetcdendpointcontroller/members_test.go b/pkg/operator/hostetcdendpointcontroller/members_test.go deleted file mode 100644 index 59503d6649..0000000000 --- a/pkg/operator/hostetcdendpointcontroller/members_test.go +++ /dev/null @@ -1,177 +0,0 @@ -package hostetcdendpointcontroller - -import ( - "bytes" - "encoding/json" - "reflect" - "testing" - - v1 "github.com/openshift/api/operator/v1" - ceoapi "github.com/openshift/cluster-etcd-operator/pkg/operator/api" - "github.com/openshift/library-go/pkg/operator/v1helpers" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" -) - -func Test_getHostname(t *testing.T) { - type args struct { - peerURLs []string - } - tests := []struct { - name string - args args - want string - wantErr bool - }{ - { - name: "valid test case for etcd member", - args: args{peerURLs: []string{"https://etcd-0.foouser.tests.com:2380"}}, - want: "etcd-0", - }, - { - name: "valid test case for etcd bootstrap node", - args: args{peerURLs: []string{"https://10.0.139.142:2380"}}, - want: "etcd-bootstrap", - }, - { - name: "error case malformed IP address", - args: args{peerURLs: []string{"https://10.0.139:2380"}}, - want: "getEtcdName: peerURL \"https://10.0.139:2380\" is not properly formatted", - wantErr: true, - }, - { - name: "error case empty peerURLs", - args: args{peerURLs: []string{""}}, - want: "getEtcdName: peerURL is empty", - wantErr: true, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := getEtcdName(tt.args.peerURLs[0]) - if tt.wantErr { - got = err.Error() - } - if got != tt.want { - t.Errorf("getEtcdName() = %q, want %q", got, tt.want) - } - }) - } -} - -func Test_healthyEtcdMemberGetter_EtcdList(t *testing.T) { - node := "node1" - peerURL := "https://etcd-0.foouser.test.com:2380" - bootstrapNode := "etcd-bootstrap" - bootstrapPeerUrl := "https://10.0.139.142:2380" - //podIP := "10.0.139.142" - clusterMemberPath := []string{"cluster", "members"} - - var etcdURLs []interface{} - observedConfig := map[string]interface{}{} - etcdURL := map[string]interface{}{} - if err := unstructured.SetNestedField(etcdURL, node, "name"); err != nil { - t.Fatalf("error occured in writing nested fields %#v", err) - } - - if err := unstructured.SetNestedField(etcdURL, peerURL, "peerURLs"); err != nil { - t.Fatalf("error occured in writing nested fields %#v", err) - } - - if err := unstructured.SetNestedField(etcdURL, string(ceoapi.MemberReady), "status"); err != nil { - t.Fatalf("error occured in writing nested fields observedConfig: %#v", err) - } - - etcdURLs = append(etcdURLs, etcdURL) - etcdBootstrapURL := map[string]interface{}{} - if err := unstructured.SetNestedField(etcdBootstrapURL, bootstrapNode, "name"); err != nil { - t.Fatalf("error occured in writing nested fields %#v", err) - } - - if err := unstructured.SetNestedField(etcdBootstrapURL, bootstrapPeerUrl, "peerURLs"); err != nil { - t.Fatalf("error occured in writing nested fields %#v", err) - } - - if err := unstructured.SetNestedField(etcdBootstrapURL, string(ceoapi.MemberReady), "status"); err != nil { - t.Fatalf("error occured in writing nested fields observedConfig: %#v", err) - } - - etcdURLs = append(etcdURLs, etcdBootstrapURL) - - if err := unstructured.SetNestedField(observedConfig, etcdURLs, clusterMemberPath...); err != nil { - t.Fatalf("error occured in writing nested fields observedConfig: %#v", err) - } - - b := &bytes.Buffer{} - e := json.NewEncoder(b) - err := e.Encode(observedConfig) - - if err != nil { - t.Fatalf("err encoding observedConfig %#v", err) - } - - etcdSpec := v1.StaticPodOperatorSpec{ - OperatorSpec: v1.OperatorSpec{ - ObservedConfig: runtime.RawExtension{ - Raw: b.Bytes(), - }, - }, - } - - fakeOperatorClient := v1helpers.NewFakeOperatorClient(&etcdSpec.OperatorSpec, nil, nil) - - type fields struct { - operatorConfigClient v1helpers.OperatorClient - } - type args struct { - bucket string - } - tests := []struct { - name string - fields fields - args args - want []ceoapi.Member - wantErr bool - }{ - { - name: "valid test case", - fields: fields{operatorConfigClient: fakeOperatorClient}, - args: args{bucket: "members"}, - want: []ceoapi.Member{ - { - Name: node, - PeerURLS: []string{peerURL}, - Conditions: []ceoapi.MemberCondition{ - { - Type: ceoapi.MemberReady, - }, - }, - }, - { - Name: bootstrapNode, - PeerURLS: []string{bootstrapPeerUrl}, - Conditions: []ceoapi.MemberCondition{ - { - Type: ceoapi.MemberReady, - }, - }, - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - h := &healthyEtcdMemberGetter{ - operatorConfigClient: tt.fields.operatorConfigClient, - } - got, err := h.EtcdList(tt.args.bucket) - if (err != nil) != tt.wantErr { - t.Errorf("EtcdList() error = %v, wantErr %v", err, tt.wantErr) - return - } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("EtcdList() got = %v, want %v", got, tt.want) - } - }) - } -} diff --git a/pkg/operator/starter.go b/pkg/operator/starter.go index cfa177c207..80ebb345fd 100644 --- a/pkg/operator/starter.go +++ b/pkg/operator/starter.go @@ -6,22 +6,14 @@ import ( "os" "time" + "github.com/openshift/cluster-etcd-operator/pkg/operator/clustermembercontroller2" + configv1 "github.com/openshift/api/config/v1" operatorv1 "github.com/openshift/api/operator/v1" configv1client "github.com/openshift/client-go/config/clientset/versioned" configv1informers "github.com/openshift/client-go/config/informers/externalversions" operatorversionedclient "github.com/openshift/client-go/operator/clientset/versioned" operatorv1informers "github.com/openshift/client-go/operator/informers/externalversions" - "github.com/openshift/cluster-etcd-operator/pkg/operator/bootstrapteardown" - "github.com/openshift/cluster-etcd-operator/pkg/operator/clustermembercontroller" - "github.com/openshift/cluster-etcd-operator/pkg/operator/configobservation/configobservercontroller" - "github.com/openshift/cluster-etcd-operator/pkg/operator/etcd_assets" - "github.com/openshift/cluster-etcd-operator/pkg/operator/etcdcertsigner" - "github.com/openshift/cluster-etcd-operator/pkg/operator/etcdcertsigner2" - "github.com/openshift/cluster-etcd-operator/pkg/operator/hostetcdendpointcontroller" - "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" - "github.com/openshift/cluster-etcd-operator/pkg/operator/resourcesynccontroller" - "github.com/openshift/cluster-etcd-operator/pkg/operator/targetconfigcontroller" "github.com/openshift/library-go/pkg/controller/controllercmd" "github.com/openshift/library-go/pkg/operator/genericoperatorclient" "github.com/openshift/library-go/pkg/operator/resource/resourceapply" @@ -34,6 +26,15 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" + + "github.com/openshift/cluster-etcd-operator/pkg/operator/bootstrapteardown" + "github.com/openshift/cluster-etcd-operator/pkg/operator/configobservation/configobservercontroller" + "github.com/openshift/cluster-etcd-operator/pkg/operator/etcd_assets" + "github.com/openshift/cluster-etcd-operator/pkg/operator/etcdcertsigner2" + "github.com/openshift/cluster-etcd-operator/pkg/operator/hostendpointscontroller" + "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" + "github.com/openshift/cluster-etcd-operator/pkg/operator/resourcesynccontroller" + "github.com/openshift/cluster-etcd-operator/pkg/operator/targetconfigcontroller" ) func RunOperator(ctx context.Context, controllerContext *controllercmd.ControllerContext) error { @@ -66,7 +67,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle "", operatorclient.GlobalUserSpecifiedConfigNamespace, operatorclient.GlobalMachineSpecifiedConfigNamespace, - operatorclient.TargetNamespace, + "openshift-etcd", operatorclient.OperatorNamespace, "openshift-kube-apiserver", "openshift-etcd", @@ -112,7 +113,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle os.Getenv("IMAGE"), os.Getenv("OPERATOR_IMAGE"), operatorClient, - kubeInformersForNamespaces.InformersFor(operatorclient.TargetNamespace), + kubeInformersForNamespaces.InformersFor("openshift-etcd"), kubeInformersForNamespaces, dynamicClient, kubeClient, @@ -134,7 +135,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle WithEvents(controllerContext.EventRecorder). WithInstaller([]string{"cluster-etcd-operator", "installer"}). WithPruning([]string{"cluster-etcd-operator", "prune"}, "etcd-pod"). - WithResources(operatorclient.TargetNamespace, "etcd", RevisionConfigMaps, RevisionSecrets). + WithResources("openshift-etcd", "etcd", RevisionConfigMaps, RevisionSecrets). WithCerts("etcd-certs", CertConfigMaps, CertSecrets). WithVersioning(operatorclient.OperatorNamespace, "etcd", versionRecorder). ToControllers() @@ -149,7 +150,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle {Resource: "namespaces", Name: operatorclient.GlobalUserSpecifiedConfigNamespace}, {Resource: "namespaces", Name: operatorclient.GlobalMachineSpecifiedConfigNamespace}, {Resource: "namespaces", Name: operatorclient.OperatorNamespace}, - {Resource: "namespaces", Name: operatorclient.TargetNamespace}, + {Resource: "namespaces", Name: "openshift-etcd"}, }, configClient.ConfigV1(), configInformers.Config().V1().ClusterOperators(), @@ -157,19 +158,8 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle versionRecorder, controllerContext.EventRecorder, ) - clusterInfrastructure, err := configClient.ConfigV1().Infrastructures().Get("cluster", metav1.GetOptions{}) - if err != nil && !errors.IsNotFound(err) { - return err - } - etcdDiscoveryDomain := clusterInfrastructure.Status.EtcdDiscoveryDomain coreClient := clientset - etcdCertSignerController := etcdcertsigner.NewEtcdCertSignerController( - coreClient, - operatorClient, - kubeInformersForNamespaces.InformersFor("openshift-etcd"), - controllerContext.EventRecorder, - ) etcdCertSignerController2 := etcdcertsigner2.NewEtcdCertSignerController( dynamicClient, coreClient, @@ -177,24 +167,24 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle kubeInformersForNamespaces, controllerContext.EventRecorder, ) - hostEtcdEndpointController := hostetcdendpointcontroller.NewHostEtcdEndpointcontroller( - coreClient, + hostEtcdEndpointController := hostendpointscontroller.NewHostEndpointsController( operatorClient, - kubeInformersForNamespaces.InformersFor("openshift-etcd"), controllerContext.EventRecorder, + coreClient, + kubeInformersForNamespaces, + dynamicInformers, ) - clusterMemberController := clustermembercontroller.NewClusterMemberController( - coreClient, + clusterMemberController2 := clustermembercontroller2.NewClusterMemberController( + dynamicClient, operatorClient, kubeInformersForNamespaces.InformersFor("openshift-etcd"), controllerContext.EventRecorder, - etcdDiscoveryDomain, ) bootstrapTeardownController := bootstrapteardown.NewBootstrapTeardownController( operatorClient, + kubeClient, kubeInformersForNamespaces, - clusterMemberController, operatorConfigInformers, controllerContext.EventRecorder, ) @@ -206,13 +196,12 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle go staticResourceController.Run(ctx, 1) go targetConfigReconciler.Run(1, ctx.Done()) - go etcdCertSignerController.Run(1, ctx.Done()) go etcdCertSignerController2.Run(1, ctx.Done()) - go hostEtcdEndpointController.Run(1, ctx.Done()) + go hostEtcdEndpointController.Run(ctx, 1) go resourceSyncController.Run(ctx, 1) go statusController.Run(ctx, 1) go configObserver.Run(ctx, 1) - go clusterMemberController.Run(ctx.Done()) + go clusterMemberController2.Run(ctx.Done()) go bootstrapTeardownController.Run(ctx.Done()) go staticPodControllers.Run(ctx, 1) diff --git a/pkg/operator/targetconfigcontroller/etcd_env.go b/pkg/operator/targetconfigcontroller/etcd_env.go index 46be812e57..a174fa7e94 100644 --- a/pkg/operator/targetconfigcontroller/etcd_env.go +++ b/pkg/operator/targetconfigcontroller/etcd_env.go @@ -5,6 +5,8 @@ import ( "net" "strings" + "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" + operatorv1 "github.com/openshift/api/operator/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -19,8 +21,9 @@ type envVarContext struct { spec operatorv1.StaticPodOperatorSpec status operatorv1.StaticPodOperatorStatus - nodeLister corev1listers.NodeLister - dynamicClient dynamic.Interface + endpointLister corev1listers.EndpointsLister + nodeLister corev1listers.NodeLister + dynamicClient dynamic.Interface } type envVarFunc func(envVarContext envVarContext) (map[string]string, error) @@ -30,9 +33,11 @@ var envVarFns = []envVarFunc{ getDNSName, getFixedEtcdEnvVars, getEtcdName, + getAllClusterMembers, } // getEtcdEnvVars returns the env vars that need to be set on the etcd static pods that will be rendered. +// ALL_ETCD_ENDPOINTS - this is used to drive the ETCD_INITIAL_CLUSTER // ETCD_DATA_DIR // ETCDCTL_API // ETCD_QUOTA_BACKEND_BYTES @@ -40,10 +45,6 @@ var envVarFns = []envVarFunc{ // NODE_%s_IP // NODE_%s_ETCD_DNS_NAME // NODE_%s_ETCD_NAME -// TODO -// ALL_ETCD_INITIAL_CLUSTER -// ETCD_INITIAL_CLUSTER_STATE -// ETCD_ENDPOINTS func getEtcdEnvVars(envVarContext envVarContext) (map[string]string, error) { ret := map[string]string{} @@ -72,6 +73,33 @@ func getFixedEtcdEnvVars(envVarContext envVarContext) (map[string]string, error) }, nil } +func getAllClusterMembers(envVarContext envVarContext) (map[string]string, error) { + ret := map[string]string{} + + endpoints := []string{} + for _, nodeInfo := range envVarContext.status.NodeStatuses { + endpoint, err := getInternalIPAddressForNodeName(envVarContext, nodeInfo.NodeName) + if err != nil { + return nil, err + } + endpoints = append(endpoints, fmt.Sprintf("https://%s:2379", endpoint)) + } + + hostEtcdEndpoints, err := envVarContext.endpointLister.Endpoints(operatorclient.TargetNamespace).Get("host-etcd") + if err != nil { + return nil, err + } + for _, endpointAddress := range hostEtcdEndpoints.Subsets[0].Addresses { + if endpointAddress.Hostname == "etcd-bootstrap" { + endpoints = append(endpoints, fmt.Sprintf("https://%s:2379", endpointAddress.IP)) + break + } + } + ret["ALL_ETCD_ENDPOINTS"] = strings.Join(endpoints, ",") + + return ret, nil +} + func getEtcdName(envVarContext envVarContext) (map[string]string, error) { ret := map[string]string{} diff --git a/pkg/operator/targetconfigcontroller/targetconfigcontroller.go b/pkg/operator/targetconfigcontroller/targetconfigcontroller.go index 8ddbaa902b..2a07baf54d 100644 --- a/pkg/operator/targetconfigcontroller/targetconfigcontroller.go +++ b/pkg/operator/targetconfigcontroller/targetconfigcontroller.go @@ -5,14 +5,13 @@ import ( "strings" "time" + "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/dynamic" operatorv1 "github.com/openshift/api/operator/v1" - "github.com/openshift/cluster-etcd-operator/pkg/operator/etcd_assets" - "github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient" - "github.com/openshift/cluster-etcd-operator/pkg/version" "github.com/openshift/library-go/pkg/operator/events" "github.com/openshift/library-go/pkg/operator/resource/resourceapply" "github.com/openshift/library-go/pkg/operator/resource/resourcemerge" @@ -28,6 +27,9 @@ import ( "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" "k8s.io/klog" + + "github.com/openshift/cluster-etcd-operator/pkg/operator/etcd_assets" + "github.com/openshift/cluster-etcd-operator/pkg/version" ) const workQueueKey = "key" @@ -41,6 +43,7 @@ type TargetConfigController struct { dyanmicClient dynamic.Interface kubeClient kubernetes.Interface configMapLister corev1listers.ConfigMapLister + endpointLister corev1listers.EndpointsLister nodeLister corev1listers.NodeLister eventRecorder events.Recorder @@ -66,12 +69,14 @@ func NewTargetConfigController( dyanmicClient: dyanmicClient, kubeClient: kubeClient, configMapLister: kubeInformersForNamespaces.ConfigMapLister(), + endpointLister: kubeInformersForNamespaces.InformersFor(operatorclient.TargetNamespace).Core().V1().Endpoints().Lister(), nodeLister: kubeInformersForNamespaces.InformersFor("").Core().V1().Nodes().Lister(), eventRecorder: eventRecorder.WithComponentSuffix("target-config-controller"), queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "TargetConfigController"), cachesToSync: []cache.InformerSynced{ operatorClient.Informer().HasSynced, + kubeInformersForNamespaces.InformersFor(operatorclient.TargetNamespace).Core().V1().Endpoints().Informer().HasSynced, kubeInformersForOpenshiftEtcdNamespace.Core().V1().ConfigMaps().Informer().HasSynced, kubeInformersForOpenshiftEtcdNamespace.Core().V1().Secrets().Informer().HasSynced, kubeInformersForNamespaces.InformersFor("").Core().V1().Nodes().Informer().HasSynced, @@ -81,6 +86,7 @@ func NewTargetConfigController( operatorClient.Informer().AddEventHandler(c.eventHandler()) kubeInformersForOpenshiftEtcdNamespace.Core().V1().ConfigMaps().Informer().AddEventHandler(c.eventHandler()) kubeInformersForOpenshiftEtcdNamespace.Core().V1().Secrets().Informer().AddEventHandler(c.eventHandler()) + kubeInformersForNamespaces.InformersFor(operatorclient.TargetNamespace).Core().V1().Endpoints().Informer().AddEventHandler(c.eventHandler()) // TODO only trigger on master nodes kubeInformersForNamespaces.InformersFor("").Core().V1().Nodes().Informer().AddEventHandler(c.eventHandler()) @@ -177,10 +183,11 @@ func loglevelToKlog(logLevel operatorv1.LogLevel) string { func (c *TargetConfigController) managePod(client coreclientv1.ConfigMapsGetter, recorder events.Recorder, operatorSpec *operatorv1.StaticPodOperatorSpec, operatorStatus *operatorv1.StaticPodOperatorStatus, imagePullSpec, operatorImagePullSpec string) (*corev1.ConfigMap, bool, error) { envVarMap, err := getEtcdEnvVars(envVarContext{ - spec: *operatorSpec, - status: *operatorStatus, - nodeLister: c.nodeLister, - dynamicClient: c.dyanmicClient, + spec: *operatorSpec, + status: *operatorStatus, + nodeLister: c.nodeLister, + dynamicClient: c.dyanmicClient, + endpointLister: c.endpointLister, }) if err != nil { return nil, false, err @@ -273,7 +280,7 @@ func (c *TargetConfigController) namespaceEventHandler() cache.ResourceEventHand if !ok { c.queue.Add(workQueueKey) } - if ns.Name == operatorclient.TargetNamespace { + if ns.Name == ("openshift-etcd") { c.queue.Add(workQueueKey) } }, @@ -282,7 +289,7 @@ func (c *TargetConfigController) namespaceEventHandler() cache.ResourceEventHand if !ok { c.queue.Add(workQueueKey) } - if ns.Name == operatorclient.TargetNamespace { + if ns.Name == ("openshift-etcd") { c.queue.Add(workQueueKey) } }, @@ -300,7 +307,7 @@ func (c *TargetConfigController) namespaceEventHandler() cache.ResourceEventHand return } } - if ns.Name == operatorclient.TargetNamespace { + if ns.Name == ("openshift-etcd") { c.queue.Add(workQueueKey) } }, diff --git a/vendor/github.com/openshift/build-machinery-go/OWNERS b/vendor/github.com/openshift/build-machinery-go/OWNERS index ff2b6a24c8..0db47be860 100644 --- a/vendor/github.com/openshift/build-machinery-go/OWNERS +++ b/vendor/github.com/openshift/build-machinery-go/OWNERS @@ -1,4 +1,10 @@ reviewers: - - tnozicka + - tnozicka + - sttts + - mfojtik + - soltysh approvers: - - tnozicka + - tnozicka + - sttts + - mfojtik + - soltysh diff --git a/vendor/github.com/openshift/build-machinery-go/make/lib/golang.mk b/vendor/github.com/openshift/build-machinery-go/make/lib/golang.mk index dd8863d64e..857ddc9b57 100644 --- a/vendor/github.com/openshift/build-machinery-go/make/lib/golang.mk +++ b/vendor/github.com/openshift/build-machinery-go/make/lib/golang.mk @@ -17,7 +17,7 @@ GOFMT_FLAGS ?=-s -l GOLINT ?=golint go_version :=$(shell $(GO) version | sed -E -e 's/.*go([0-9]+.[0-9]+.[0-9]+).*/\1/') -GO_REQUIRED_MIN_VERSION ?=1.13.5 +GO_REQUIRED_MIN_VERSION ?=1.13.4 ifneq "$(GO_REQUIRED_MIN_VERSION)" "" $(call require_minimal_version,$(GO),GO_REQUIRED_MIN_VERSION,$(go_version)) endif diff --git a/vendor/github.com/openshift/build-machinery-go/make/lib/version.mk b/vendor/github.com/openshift/build-machinery-go/make/lib/version.mk index 4a0a826c25..66d03ead24 100644 --- a/vendor/github.com/openshift/build-machinery-go/make/lib/version.mk +++ b/vendor/github.com/openshift/build-machinery-go/make/lib/version.mk @@ -1,7 +1,7 @@ # $1 - required version # $2 - current version define is_equal_or_higher_version -$(strip $(filter $(2),$(firstword $(shell set -euo pipefail && echo -e '$(1)\n$(2)' | sort -V -r -b)))) +$(strip $(filter $(2),$(firstword $(shell set -euo pipefail && printf '%s\n%s' '$(1)' '$(2)' | sort -V -r -b)))) endef # $1 - program name diff --git a/vendor/modules.txt b/vendor/modules.txt index 22b5200dab..6ccd8966a3 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -161,7 +161,7 @@ github.com/openshift/api/template github.com/openshift/api/template/v1 github.com/openshift/api/user github.com/openshift/api/user/v1 -# github.com/openshift/build-machinery-go v0.0.0-20200205161356-ef115f5adc73 +# github.com/openshift/build-machinery-go v0.0.0-20200210090402-3b072832771e github.com/openshift/build-machinery-go github.com/openshift/build-machinery-go/make github.com/openshift/build-machinery-go/make/lib