From a50a4787a0739a9a4506d37425580b0d7110d940 Mon Sep 17 00:00:00 2001 From: Maskym Vavilov Date: Tue, 5 Nov 2024 12:39:03 +0000 Subject: [PATCH] add on-cluster healtcheck test and prevent enforced false on not healthy probes Signed-off-by: Maskym Vavilov --- api/v1alpha1/dnspolicy_types.go | 4 +- controllers/dnspolicy_status_updater.go | 41 +++++++- go.mod | 2 +- go.sum | 2 + .../apimachinery_status_conditions.go | 27 ++++++ .../dnspolicy/dnspolicy_controller_test.go | 96 +++++++++++++++++++ 6 files changed, 168 insertions(+), 4 deletions(-) diff --git a/api/v1alpha1/dnspolicy_types.go b/api/v1alpha1/dnspolicy_types.go index 5aa5ad64f..5f734e1f8 100644 --- a/api/v1alpha1/dnspolicy_types.go +++ b/api/v1alpha1/dnspolicy_types.go @@ -311,9 +311,9 @@ func (p *DNSPolicy) WithTargetGatewayListener(gwName string, lName string) *DNSP func (p *DNSPolicy) WithHealthCheckFor(endpoint string, port int, protocol string, failureThreshold int) *DNSPolicy { return p.WithHealthCheck(dnsv1alpha1.HealthCheckSpec{ Path: endpoint, - Port: &port, + Port: port, Protocol: dnsv1alpha1.Protocol(protocol), - FailureThreshold: &failureThreshold, + FailureThreshold: failureThreshold, }) } diff --git a/controllers/dnspolicy_status_updater.go b/controllers/dnspolicy_status_updater.go index 5501174e9..237f6f424 100644 --- a/controllers/dnspolicy_status_updater.go +++ b/controllers/dnspolicy_status_updater.go @@ -93,6 +93,13 @@ func (r *DNSPolicyStatusUpdater) updateStatus(ctx context.Context, _ []controlle } meta.SetStatusCondition(&newStatus.Conditions, *enforcedCond) + if policy.Spec.HealthCheck != nil { + healthyCond := healthyCondition(policyRecords, policy) + meta.SetStatusCondition(&newStatus.Conditions, *healthyCond) + } else { + meta.RemoveStatusCondition(&newStatus.Conditions, string(kuadrant.PolicyConditionSubResourcesHealthy)) + } + propagateRecordConditions(policyRecords, newStatus) newStatus.TotalRecords = int32(len(policyRecords)) @@ -133,7 +140,9 @@ func enforcedCondition(records []*kuadrantdnsv1alpha1.DNSRecord, dnsPolicy *kuad // filter not ready records notReadyRecords := utils.Filter(records, func(record *kuadrantdnsv1alpha1.DNSRecord) bool { - return meta.IsStatusConditionFalse(record.Status.Conditions, string(kuadrantdnsv1alpha1.ConditionTypeReady)) + cond := meta.FindStatusCondition(record.Status.Conditions, string(kuadrantdnsv1alpha1.ConditionTypeReady)) + // if the record is not ready (but let through records that are not ready because of the health failure) + return cond == nil || (cond.Status == metav1.ConditionFalse && cond.Reason != string(kuadrantdnsv1alpha1.ConditionReasonUnhealthy)) }) // if there are records and none of the records are ready @@ -155,6 +164,36 @@ func enforcedCondition(records []*kuadrantdnsv1alpha1.DNSRecord, dnsPolicy *kuad return kuadrant.EnforcedCondition(dnsPolicy, nil, true) } +func healthyCondition(records []*kuadrantdnsv1alpha1.DNSRecord, dnsPolicy *kuadrantv1alpha1.DNSPolicy) *metav1.Condition { + // if we don't have records - consider healthy + if len(records) == 0 { + cond := kuadrant.HealthyCondition(dnsPolicy, nil) + cond.Message = "No sub-resources present" + return cond + } + + // filter not healthy records + notHealthyRecords := utils.Filter(records, func(record *kuadrantdnsv1alpha1.DNSRecord) bool { + return meta.IsStatusConditionFalse(record.Status.Conditions, string(kuadrantdnsv1alpha1.ConditionTypeHealthy)) + }) + + // all records are healthy + if len(notHealthyRecords) == 0 { + cond := kuadrant.HealthyCondition(dnsPolicy, nil) + cond.Message = "All sub-resources are healthy" + return cond + } + + cond := kuadrant.HealthyCondition(dnsPolicy, kuadrant.NewErrUnknown(dnsPolicy.Kind(), errors.New("not all sub-resources of policy are healthy"))) + additionalMessage := ". Not healthy DNSRecords are: " + for _, record := range notHealthyRecords { + additionalMessage += fmt.Sprintf("%s ", record.Name) + } + cond.Message += additionalMessage + + return cond +} + var NegativePolarityConditions []string func propagateRecordConditions(records []*kuadrantdnsv1alpha1.DNSRecord, policyStatus *kuadrantv1alpha1.DNSPolicyStatus) { diff --git a/go.mod b/go.mod index 699b43ce5..0c7e419b5 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/google/uuid v1.6.0 github.com/kuadrant/authorino v0.18.0 github.com/kuadrant/authorino-operator v0.11.1 - github.com/kuadrant/dns-operator v0.0.0-20241018131559-f2ce8b6aaaef + github.com/kuadrant/dns-operator v0.0.0-20241031152631-a8d441110b5c github.com/kuadrant/limitador-operator v0.9.0 github.com/kuadrant/policy-machinery v0.6.1 github.com/martinlindhe/base36 v1.1.1 diff --git a/go.sum b/go.sum index 895ed1ec2..bb05590b6 100644 --- a/go.sum +++ b/go.sum @@ -260,6 +260,8 @@ github.com/kuadrant/authorino-operator v0.11.1 h1:jndTZhiHMU+2Dk0NU+KP2+MUSfvclr github.com/kuadrant/authorino-operator v0.11.1/go.mod h1:TeFFdX477vUTMushCojaHpvwPLga4DpErGI2oQbqFIs= github.com/kuadrant/dns-operator v0.0.0-20241018131559-f2ce8b6aaaef h1:6P2pC1kOPcrT/22N23Mr3xr3CTHmQQkj3jmOlUEJvO0= github.com/kuadrant/dns-operator v0.0.0-20241018131559-f2ce8b6aaaef/go.mod h1:LGG4R3KEz93Ep0CV1/tziCmRk+VtojWUHR9mXkOHZks= +github.com/kuadrant/dns-operator v0.0.0-20241031152631-a8d441110b5c h1:zU0d+LaX/043ImWM1Mc5+8SV+WgKwCzyI55fQPX7bqw= +github.com/kuadrant/dns-operator v0.0.0-20241031152631-a8d441110b5c/go.mod h1:LGG4R3KEz93Ep0CV1/tziCmRk+VtojWUHR9mXkOHZks= github.com/kuadrant/limitador-operator v0.9.0 h1:hTQ6CFPayf/sL7cIzwWjCoU8uTn6fzWdsJgKbDlnFts= github.com/kuadrant/limitador-operator v0.9.0/go.mod h1:DQOlg9qFOcnWPrwO529JRCMLLOEXJQxkmOes952S/Hw= github.com/kuadrant/policy-machinery v0.6.1 h1:w43DyD/yljzz0T6PNYXmuuhLxrF+IhaFB2rUqrwvGGk= diff --git a/pkg/library/kuadrant/apimachinery_status_conditions.go b/pkg/library/kuadrant/apimachinery_status_conditions.go index 4f605b14f..d9e26cfe7 100644 --- a/pkg/library/kuadrant/apimachinery_status_conditions.go +++ b/pkg/library/kuadrant/apimachinery_status_conditions.go @@ -21,6 +21,9 @@ const ( PolicyReasonOverridden gatewayapiv1alpha2.PolicyConditionReason = "Overridden" PolicyReasonUnknown gatewayapiv1alpha2.PolicyConditionReason = "Unknown" PolicyReasonMissingDependency gatewayapiv1alpha2.PolicyConditionReason = "MissingDependency" + + PolicyConditionSubResourcesHealthy gatewayapiv1alpha2.PolicyConditionType = "SubResourcesHealthy" + PolicyReasonSubResourcesHealthy gatewayapiv1alpha2.PolicyConditionReason = "SubResourcesHealthy" ) func NewAffectedPolicyMap() *AffectedPolicyMap { @@ -138,3 +141,27 @@ func EnforcedCondition(policy Policy, err PolicyError, fully bool) *metav1.Condi return cond } + +func HealthyCondition(policy Policy, err PolicyError) *metav1.Condition { + cond := &metav1.Condition{ + Type: string(PolicyConditionSubResourcesHealthy), + Status: metav1.ConditionTrue, + Reason: string(PolicyReasonSubResourcesHealthy), + Message: fmt.Sprintf("all subresources of %s are healthy", policy.Kind()), + } + + if err == nil { + return cond + } + + // Wrap error into a PolicyError if it is not this type + var policyErr PolicyError + if !errors.As(err, &policyErr) { + policyErr = NewErrUnknown(policy.Kind(), err) + } + cond.Status = metav1.ConditionFalse + cond.Message = policyErr.Error() + cond.Reason = string(policyErr.Reason()) + + return cond +} diff --git a/tests/common/dnspolicy/dnspolicy_controller_test.go b/tests/common/dnspolicy/dnspolicy_controller_test.go index 8662dd1bf..372dba3c2 100644 --- a/tests/common/dnspolicy/dnspolicy_controller_test.go +++ b/tests/common/dnspolicy/dnspolicy_controller_test.go @@ -1244,6 +1244,102 @@ var _ = Describe("DNSPolicy controller", func() { }) + // there is no need to replicate cases form the "valid target and valid gateway status" context + // from the policy pov healthchecks can only affect the "SubResourcesHealthy" condition + Context("valid target and valid gateway status with healthchecks", func() { + BeforeEach(func(ctx SpecContext) { + gateway = tests.NewGatewayBuilder(tests.GatewayName, gatewayClass.Name, testNamespace). + WithHTTPListener(tests.ListenerNameOne, tests.HostOne(domain)). + WithHTTPListener(tests.ListenerNameWildcard, tests.HostWildcard(domain)). + Gateway + dnsPolicy = tests.NewDNSPolicy("test-dns-policy", testNamespace). + WithProviderSecret(*dnsProviderSecret). + WithTargetGateway(tests.GatewayName). + WithLoadBalancingFor(100, "foo", true). + WithHealthCheckFor("/health", 80, string(kuadrantdnsv1alpha1.HttpProtocol), 1) + + Expect(k8sClient.Create(ctx, gateway)).To(Succeed()) + Expect(k8sClient.Create(ctx, dnsPolicy)).To(Succeed()) + + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(gateway), gateway)).To(Succeed()) + gateway.Status.Addresses = []gatewayapiv1.GatewayStatusAddress{ + { + Type: ptr.To(gatewayapiv1.IPAddressType), + Value: tests.IPAddressOne, + }, + { + Type: ptr.To(gatewayapiv1.IPAddressType), + Value: tests.IPAddressTwo, + }, + } + gateway.Status.Listeners = []gatewayapiv1.ListenerStatus{ + { + Name: tests.ListenerNameOne, + SupportedKinds: []gatewayapiv1.RouteGroupKind{}, + AttachedRoutes: 1, + Conditions: []metav1.Condition{}, + }, + { + Name: tests.ListenerNameWildcard, + SupportedKinds: []gatewayapiv1.RouteGroupKind{}, + AttachedRoutes: 1, + Conditions: []metav1.Condition{}, + }, + } + g.Expect(k8sClient.Status().Update(ctx, gateway)).To(Succeed()) + }, tests.TimeoutMedium, tests.RetryIntervalMedium).Should(Succeed()) + + recordName = fmt.Sprintf("%s-%s", tests.GatewayName, tests.ListenerNameOne) + wildcardRecordName = fmt.Sprintf("%s-%s", tests.GatewayName, tests.ListenerNameWildcard) + }) + + It("should create records with enforced and not healthy status", func(ctx SpecContext) { + Eventually(func(g Gomega) { + //Check records + recordList := &kuadrantdnsv1alpha1.DNSRecordList{} + err := k8sClient.List(ctx, recordList, &client.ListOptions{Namespace: testNamespace}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(recordList.Items).To(HaveLen(2)) + + dnsRecord := &kuadrantdnsv1alpha1.DNSRecord{} + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: recordName, Namespace: testNamespace}, dnsRecord)).To(Succeed()) + + wildcardDnsRecord := &kuadrantdnsv1alpha1.DNSRecord{} + g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: wildcardRecordName, Namespace: testNamespace}, wildcardDnsRecord)).To(Succeed()) + + //Check policy status + err = k8sClient.Get(ctx, client.ObjectKeyFromObject(dnsPolicy), dnsPolicy) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(dnsPolicy.Status.Conditions).To( + ContainElements( + MatchFields(IgnoreExtras, Fields{ + "Type": Equal(string(gatewayapiv1alpha2.PolicyConditionAccepted)), + "Status": Equal(metav1.ConditionTrue), + "Reason": Equal(string(gatewayapiv1alpha2.PolicyConditionAccepted)), + "Message": Equal("DNSPolicy has been accepted"), + }), + MatchFields(IgnoreExtras, Fields{ + "Type": Equal(string(kuadrant.PolicyConditionEnforced)), + "Status": Equal(metav1.ConditionTrue), + "Reason": Equal(string(kuadrant.PolicyReasonEnforced)), + "Message": Equal("DNSPolicy has been successfully enforced"), + }), + MatchFields(IgnoreExtras, Fields{ + "Type": Equal(string(kuadrant.PolicyConditionSubResourcesHealthy)), + "Status": Equal(metav1.ConditionFalse), + "Reason": Equal(string(kuadrant.PolicyReasonUnknown)), + "Message": And( + ContainSubstring("DNSPolicy has encountered some issues: not all sub-resources of policy are healthy. Not healthy DNSRecords are:"), + ContainSubstring(recordName), + Not(ContainSubstring(wildcardRecordName))), // explicitly make sure that we have no probes for the wildcard record + })), + ) + g.Expect(dnsPolicy.Status.TotalRecords).To(Equal(int32(2))) + }, tests.TimeoutLong, tests.RetryIntervalMedium, ctx).Should(Succeed()) + }, testTimeOut) + }) + Context("cel validation", func() { It("should error targeting invalid group", func(ctx SpecContext) { p := tests.NewDNSPolicy("test-dns-policy", testNamespace).