diff --git a/.changelog/2571.txt b/.changelog/2571.txt new file mode 100644 index 0000000000..91b3f2943b --- /dev/null +++ b/.changelog/2571.txt @@ -0,0 +1,3 @@ +```release-note:bug +control-plane: fix bug in endpoints controller when deregistering services from consul when a node is deleted. +``` diff --git a/control-plane/connect-inject/controllers/endpoints/endpoints_controller.go b/control-plane/connect-inject/controllers/endpoints/endpoints_controller.go index eeaeeab485..fb44a2a5ba 100644 --- a/control-plane/connect-inject/controllers/endpoints/endpoints_controller.go +++ b/control-plane/connect-inject/controllers/endpoints/endpoints_controller.go @@ -159,7 +159,6 @@ func (r *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu } err = r.Client.Get(ctx, req.NamespacedName, &serviceEndpoints) - // endpointPods holds a set of all pods this endpoints object is currently pointing to. // We use this later when we reconcile ACL tokens to decide whether an ACL token in Consul // is for a pod that no longer exists. @@ -183,7 +182,7 @@ func (r *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu // It is possible that the endpoints object has never been registered, in which case deregistration is a no-op. if isLabeledIgnore(serviceEndpoints.Labels) { // We always deregister the service to handle the case where a user has registered the service, then added the label later. - r.Log.Info("Ignoring endpoint labeled with `consul.hashicorp.com/service-ignore: \"true\"`", "name", req.Name, "namespace", req.Namespace) + r.Log.Info("ignoring endpoint labeled with `consul.hashicorp.com/service-ignore: \"true\"`", "name", req.Name, "namespace", req.Namespace) err = r.deregisterService(apiClient, req.Name, req.Namespace, nil) return ctrl.Result{}, err } @@ -895,14 +894,14 @@ func getHealthCheckStatusReason(healthCheckStatus, podName, podNamespace string) // them only if they are not in endpointsAddressesMap. If the map is nil, it will deregister all instances. If the map // has addresses, it will only deregister instances not in the map. func (r *Controller) deregisterService(apiClient *api.Client, k8sSvcName, k8sSvcNamespace string, endpointsAddressesMap map[string]bool) error { - // Get services matching metadata. - nodesWithSvcs, err := r.serviceInstancesForK8sNodes(apiClient, k8sSvcName, k8sSvcNamespace) + // Get services matching metadata from Consul + nodesWithSvcs, err := r.serviceInstancesForNodes(apiClient, k8sSvcName, k8sSvcNamespace) if err != nil { r.Log.Error(err, "failed to get service instances", "name", k8sSvcName) return err } - // Deregister each service instance that matches the metadata. + var errs error for _, nodeSvcs := range nodesWithSvcs { for _, svc := range nodeSvcs.Services { // We need to get services matching "k8s-service-name" and "k8s-namespace" metadata. @@ -913,42 +912,48 @@ func (r *Controller) deregisterService(apiClient *api.Client, k8sSvcName, k8sSvc if _, ok := endpointsAddressesMap[svc.Address]; !ok { // If the service address is not in the Endpoints addresses, deregister it. r.Log.Info("deregistering service from consul", "svc", svc.ID) - _, err = apiClient.Catalog().Deregister(&api.CatalogDeregistration{ + _, err := apiClient.Catalog().Deregister(&api.CatalogDeregistration{ Node: nodeSvcs.Node.Node, ServiceID: svc.ID, Namespace: svc.Namespace, }, nil) if err != nil { + // Do not exit right away as there might be other services that need to be deregistered. r.Log.Error(err, "failed to deregister service instance", "id", svc.ID) - return err + errs = multierror.Append(errs, err) + } else { + serviceDeregistered = true } - serviceDeregistered = true } } else { r.Log.Info("deregistering service from consul", "svc", svc.ID) - if _, err = apiClient.Catalog().Deregister(&api.CatalogDeregistration{ + _, err := apiClient.Catalog().Deregister(&api.CatalogDeregistration{ Node: nodeSvcs.Node.Node, ServiceID: svc.ID, Namespace: svc.Namespace, - }, nil); err != nil { + }, nil) + if err != nil { + // Do not exit right away as there might be other services that need to be deregistered. r.Log.Error(err, "failed to deregister service instance", "id", svc.ID) - return err + errs = multierror.Append(errs, err) + } else { + serviceDeregistered = true } - serviceDeregistered = true } if r.AuthMethod != "" && serviceDeregistered { r.Log.Info("reconciling ACL tokens for service", "svc", svc.Service) - err = r.deleteACLTokensForServiceInstance(apiClient, svc, k8sSvcNamespace, svc.Meta[constants.MetaKeyPodName]) + err := r.deleteACLTokensForServiceInstance(apiClient, svc, k8sSvcNamespace, svc.Meta[constants.MetaKeyPodName]) if err != nil { r.Log.Error(err, "failed to reconcile ACL tokens for service", "svc", svc.Service) - return err + errs = multierror.Append(errs, err) } } } } - return nil + return errs + } // deleteACLTokensForServiceInstance finds the ACL tokens that belongs to the service instance and deletes it from Consul. @@ -1068,21 +1073,32 @@ func getTokenMetaFromDescription(description string) (map[string]string, error) return tokenMeta, nil } -func (r *Controller) serviceInstancesForK8sNodes(apiClient *api.Client, k8sServiceName, k8sServiceNamespace string) ([]*api.CatalogNodeServiceList, error) { +func (r *Controller) serviceInstancesForNodes(apiClient *api.Client, k8sServiceName, k8sServiceNamespace string) ([]*api.CatalogNodeServiceList, error) { var serviceList []*api.CatalogNodeServiceList - // Get a list of k8s nodes. - var nodeList corev1.NodeList - err := r.Client.List(r.Context, &nodeList) + + // The nodelist may have changed between this point and when the event was raised + // For example, if a pod is evicted because a node has been deleted, there is no guarantee that that node will show up here + // query consul catalog for a list of nodes supporting this service + // quite a lot of results as synthetic nodes are never deregistered. + var nodes []*api.Node + filter := fmt.Sprintf(`Meta[%q] == %q `, "synthetic-node", "true") + nodes, _, err := apiClient.Catalog().Nodes(&api.QueryOptions{Filter: filter, Namespace: namespaces.WildcardNamespace}) if err != nil { return nil, err } - for _, node := range nodeList.Items { + + var errs error + for _, node := range nodes { var nodeServices *api.CatalogNodeServiceList - nodeServices, err = r.serviceInstancesForK8SServiceNameAndNamespace(apiClient, k8sServiceName, k8sServiceNamespace, common.ConsulNodeNameFromK8sNode(node.Name)) - serviceList = append(serviceList, nodeServices) + nodeServices, err := r.serviceInstancesForK8SServiceNameAndNamespace(apiClient, k8sServiceName, k8sServiceNamespace, node.Node) + if err != nil { + errs = multierror.Append(errs, err) + } else { + serviceList = append(serviceList, nodeServices) + } } - return serviceList, err + return serviceList, errs } // serviceInstancesForK8SServiceNameAndNamespace calls Consul's ServicesWithFilter to get the list diff --git a/control-plane/connect-inject/controllers/endpoints/endpoints_controller_test.go b/control-plane/connect-inject/controllers/endpoints/endpoints_controller_test.go index acf62b2b0e..add93cf8e1 100644 --- a/control-plane/connect-inject/controllers/endpoints/endpoints_controller_test.go +++ b/control-plane/connect-inject/controllers/endpoints/endpoints_controller_test.go @@ -893,6 +893,9 @@ func TestReconcileCreateEndpoint_MultiportService(t *testing.T) { catalogRegistration := &api.CatalogRegistration{ Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: svc, } _, err := consulClient.Catalog().Register(catalogRegistration, nil) @@ -2293,6 +2296,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2312,6 +2318,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2398,6 +2407,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2417,6 +2429,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: "127.0.0.1", + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2503,6 +2518,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2520,6 +2538,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2585,6 +2606,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-different-consul-svc-name", Service: "different-consul-svc-name", @@ -2602,6 +2626,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-different-consul-svc-name-sidecar-proxy", @@ -2675,6 +2702,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2686,6 +2716,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2789,6 +2822,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2800,6 +2836,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2816,6 +2855,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-service-updated", Service: "service-updated", @@ -2827,6 +2869,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-service-updated-sidecar-proxy", @@ -2886,6 +2931,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-different-consul-svc-name", Service: "different-consul-svc-name", @@ -2897,6 +2945,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-different-consul-svc-name-sidecar-proxy", @@ -2913,6 +2964,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-different-consul-svc-name", Service: "different-consul-svc-name", @@ -2924,6 +2978,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-different-consul-svc-name-sidecar-proxy", @@ -2969,6 +3026,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -2980,6 +3040,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -2996,6 +3059,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-service-updated", Service: "service-updated", @@ -3007,6 +3073,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-service-updated-sidecar-proxy", @@ -3042,6 +3111,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-different-consul-svc-name", Service: "different-consul-svc-name", @@ -3053,6 +3125,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-different-consul-svc-name-sidecar-proxy", @@ -3069,6 +3144,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-different-consul-svc-name", Service: "different-consul-svc-name", @@ -3080,6 +3158,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-different-consul-svc-name-sidecar-proxy", @@ -3128,6 +3209,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -3145,6 +3229,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -3224,6 +3311,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -3241,6 +3331,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -3263,6 +3356,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod2-service-updated", Service: "service-updated", @@ -3280,6 +3376,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod2-service-updated-sidecar-proxy", @@ -3363,6 +3462,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-service-updated", Service: "service-updated", @@ -3380,6 +3482,9 @@ func TestReconcileUpdateEndpoint(t *testing.T) { { Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ Kind: api.ServiceKindConnectProxy, ID: "pod1-service-updated-sidecar-proxy", @@ -4070,6 +4175,9 @@ func TestReconcileDeleteEndpoint(t *testing.T) { serviceRegistration := &api.CatalogRegistration{ Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: svc, } _, err := consulClient.Catalog().Register(serviceRegistration, nil) @@ -4216,6 +4324,9 @@ func TestReconcileIgnoresServiceIgnoreLabel(t *testing.T) { serviceRegistration := &api.CatalogRegistration{ Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-" + svcName, Service: svcName, @@ -4339,6 +4450,9 @@ func TestReconcile_podSpecifiesExplicitService(t *testing.T) { _, err := consulClient.Catalog().Register(&api.CatalogRegistration{ Node: consulNodeName, Address: consulNodeAddress, + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: &api.AgentService{ ID: "pod1-" + svcName, Service: svcName, @@ -4496,6 +4610,9 @@ func TestServiceInstancesForK8SServiceNameAndNamespace(t *testing.T) { catalogRegistration := &api.CatalogRegistration{ Node: consulNodeName, Address: "127.0.0.1", + NodeMeta: map[string]string{ + metaKeySyntheticNode: "true", + }, Service: svc, } _, err = consulClient.Catalog().Register(catalogRegistration, nil)