From d3850302c008f847bfc729037de3f15f19399c14 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 30 Sep 2025 18:03:17 +0000 Subject: [PATCH 01/20] chore: Remove --pod-ip option It was not doing anything since 2020. Signed-off-by: Ihar Hrachyshka --- go-controller/pkg/config/config.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 72a22defbd..2e4260f2b9 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -377,7 +377,6 @@ type KubernetesConfig struct { ServiceCIDRs []*net.IPNet OVNConfigNamespace string `gcfg:"ovn-config-namespace"` OVNEmptyLbEvents bool `gcfg:"ovn-empty-lb-events"` - PodIP string `gcfg:"pod-ip"` // UNUSED RawNoHostSubnetNodes string `gcfg:"no-hostsubnet-nodes"` NoHostSubnetNodes labels.Selector HostNetworkNamespace string `gcfg:"host-network-namespace"` From a9d76d66c90baeb07f406e1ed8ad3010c0a42bbd Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 30 Sep 2025 20:45:00 +0000 Subject: [PATCH 02/20] fix: --logfile-maxsize is in megabytes, not bytes Signed-off-by: Ihar Hrachyshka --- go-controller/pkg/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 72a22defbd..9866aeeafc 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -945,7 +945,7 @@ var CommonFlags = []cli.Flag{ // Logfile rotation parameters &cli.IntFlag{ Name: "logfile-maxsize", - Usage: "Maximum size in bytes of the log file before it gets rolled", + Usage: "Maximum size in megabytes of the log file before it gets rolled", Destination: &cliConfig.Logging.LogFileMaxSize, Value: Logging.LogFileMaxSize, }, From d4136ccfb8e2762547a6cdd193f1722b0840596b Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 30 Sep 2025 22:48:27 +0000 Subject: [PATCH 03/20] fix: list allowed values for --platform-type option Signed-off-by: Ihar Hrachyshka Assisted-By: Claude Code; claude-sonnet-4-20250514 --- go-controller/pkg/config/config.go | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 72a22defbd..ff2e20d3b3 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -13,6 +13,7 @@ import ( "strings" "time" + "github.com/openshift/api/config/v1" "github.com/urfave/cli/v2" gcfg "gopkg.in/gcfg.v1" lumberjack "gopkg.in/natefinch/lumberjack.v2" @@ -28,6 +29,28 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ) +// getSupportedPlatformTypes returns a list of all supported platform types +func getSupportedPlatformTypes() []string { + return []string{ + string(v1.AWSPlatformType), // "AWS" + string(v1.AzurePlatformType), // "Azure" + string(v1.BareMetalPlatformType), // "BareMetal" + string(v1.GCPPlatformType), // "GCP" + string(v1.LibvirtPlatformType), // "Libvirt" + string(v1.OpenStackPlatformType), // "OpenStack" + string(v1.NonePlatformType), // "None" + string(v1.VSpherePlatformType), // "VSphere" + string(v1.OvirtPlatformType), // "oVirt" + string(v1.IBMCloudPlatformType), // "IBMCloud" + string(v1.KubevirtPlatformType), // "KubeVirt" + string(v1.EquinixMetalPlatformType), // "EquinixMetal" + string(v1.PowerVSPlatformType), // "PowerVS" + string(v1.AlibabaCloudPlatformType), // "AlibabaCloud" + string(v1.NutanixPlatformType), // "Nutanix" + string(v1.ExternalPlatformType), // "External" + } +} + // DefaultEncapPort number used if not supplied const DefaultEncapPort = 6081 @@ -1277,7 +1300,7 @@ var K8sFlags = []cli.Flag{ &cli.StringFlag{ Name: "platform-type", Usage: "The cloud provider platform type ovn-kubernetes is deployed on. " + - "Valid values can be found in: https://github.com/ovn-org/ovn-kubernetes/blob/master/go-controller/vendor/github.com/openshift/api/config/v1/types_infrastructure.go#L130-L172", + "Valid values: " + strings.Join(getSupportedPlatformTypes(), ", "), Destination: &cliConfig.Kubernetes.PlatformType, Value: Kubernetes.PlatformType, }, From 317bdd67a08919f8a85a63465b1debc4102e1c07 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Thu, 9 Oct 2025 09:09:51 +0200 Subject: [PATCH 04/20] Enable ovn-ci workflow on release branches Replace master with base branch to make it work on release branches. Signed-off-by: Nadia Pinaeva --- .github/workflows/test.yml | 92 +++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 00cfdbd30b..c3b01607de 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,7 +3,7 @@ name: ovn-ci on: merge_group: pull_request: - branches: [ master ] + branches: [ master,release-* ] # Only run jobs if at least one non-doc file is changed paths-ignore: - '**/*.md' @@ -30,7 +30,7 @@ env: # This must be a directory CI_IMAGE_CACHE: tmp/image_cache/ - CI_IMAGE_MASTER_TAR: image-master.tar + CI_IMAGE_BASE_TAR: image-base.tar CI_IMAGE_PR_TAR: image-pr.tar CI_DIST_IMAGES_OUTPUT: dist/images/_output/ @@ -65,49 +65,49 @@ jobs: working-directory: go-controller args: --modules-download-mode=vendor --timeout=15m0s --verbose - build-master: - name: Build-master + build-base: + name: Build-base runs-on: ubuntu-24.04 steps: - # Create a cache for the built master image - - name: Restore master image cache - id: image_cache_master + # Create a cache for the built base image + - name: Restore base image cache + id: image_cache_base uses: actions/cache@v4 with: path: | ${{ env.CI_IMAGE_CACHE }} - key: ${{ github.run_id }}-image-cache-master - # if CI_IMAGE_MASTER_TAR isn't in cache, try pulling it and saving to the cache rather + key: ${{ github.run_id }}-image-cache-base + # if CI_IMAGE_BASE_TAR isn't in cache, try pulling it and saving to the cache rather # than building, resort back to building if the cache isn't populated and # pulling the image fails. - - name: Check if master image build is needed - id: is_master_image_build_needed + - name: Check if base image build is needed + id: is_base_image_build_needed continue-on-error: false run: | set -x - if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR}.gz ]; then - cp ${CI_IMAGE_CACHE}/${CI_IMAGE_MASTER_TAR}.gz ${CI_IMAGE_MASTER_TAR}.gz - gunzip ${CI_IMAGE_MASTER_TAR}.gz - echo "MASTER_IMAGE_RESTORED_FROM_CACHE=true" >> "$GITHUB_OUTPUT" - echo "MASTER_IMAGE_RESTORED=true" >> "$GITHUB_OUTPUT" + if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR}.gz ]; then + cp ${CI_IMAGE_CACHE}/${CI_IMAGE_BASE_TAR}.gz ${CI_IMAGE_BASE_TAR}.gz + gunzip ${CI_IMAGE_BASE_TAR}.gz + echo "BASE_IMAGE_RESTORED_FROM_CACHE=true" >> "$GITHUB_OUTPUT" + echo "BASE_IMAGE_RESTORED=true" >> "$GITHUB_OUTPUT" exit 0 fi - if docker pull ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora:master; then - docker tag ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora:master ovn-daemonset-fedora:dev + if docker pull ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora:${{ github.base_ref || github.ref_name }}; then + docker tag ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora:${{ github.base_ref || github.ref_name }} ovn-daemonset-fedora:dev - echo "MASTER_IMAGE_RESTORED=true" >> "$GITHUB_OUTPUT" + echo "BASE_IMAGE_RESTORED=true" >> "$GITHUB_OUTPUT" exit 0 fi - # only run the following steps if the master image was not found in the cache - - name: Check out code into the Go module directory - from master branch - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED != 'true' && success() + # only run the following steps if the base image was not found in the cache + - name: Check out code into the Go module directory - from base branch + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED != 'true' && success() uses: actions/checkout@v4 with: - ref: master + ref: ${{ github.base_ref || github.ref_name }} - name: Set up Go - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED != 'true' && success() + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED != 'true' && success() uses: actions/setup-go@v5 with: go-version-file: 'go-controller/go.mod' @@ -118,8 +118,8 @@ jobs: cache: false id: go - - name: Build - from master branch - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED != 'true' && success() + - name: Build - from base branch + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED != 'true' && success() run: | set -x pushd go-controller @@ -127,8 +127,8 @@ jobs: make windows popd - - name: Build docker image - from master branch - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED != 'true' && success() + - name: Build docker image - from base branch + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED != 'true' && success() run: | make -C dist/images \ IMAGE=ovn-daemonset-fedora:dev \ @@ -136,27 +136,27 @@ jobs: OVN_GITREF=${{ env.OVN_GITREF }} \ fedora-image - - name: Cache master image - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED_FROM_CACHE != 'true' && success() + - name: Cache base image + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED_FROM_CACHE != 'true' && success() continue-on-error: false run: | set -x - if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR} ]; then - rm -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR} + if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR} ]; then + rm -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR} fi - if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR}.gz ]; then - rm -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR}.gz + if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR}.gz ]; then + rm -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR}.gz fi - docker save ovn-daemonset-fedora:dev -o ${CI_IMAGE_MASTER_TAR} + docker save ovn-daemonset-fedora:dev -o ${CI_IMAGE_BASE_TAR} mkdir -p ${CI_IMAGE_CACHE} - cp ${CI_IMAGE_MASTER_TAR} ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR} - gzip ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR} + cp ${CI_IMAGE_BASE_TAR} ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR} + gzip ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR} # run the following always if none of the steps before failed - uses: actions/upload-artifact@v4 with: - name: test-image-master - path: ${{ env.CI_IMAGE_MASTER_TAR }} + name: test-image-base + path: ${{ env.CI_IMAGE_BASE_TAR }} build-pr: name: Build-PR @@ -273,12 +273,12 @@ jobs: path: ${{ env.CI_DIST_IMAGES_OUTPUT }}/${{ env.CI_IMAGE_PR_TAR }} ovn-upgrade-e2e: - name: Upgrade OVN from Master to PR branch based image + name: Upgrade OVN from Base to PR branch based image if: github.event_name != 'schedule' runs-on: ubuntu-24.04 timeout-minutes: 120 needs: - - build-master + - build-base - build-pr strategy: fail-fast: false @@ -296,10 +296,10 @@ jobs: OVN_MULTICAST_ENABLE: "false" steps: - - name: Check out code into the Go module directory - from Master branch + - name: Check out code into the Go module directory - from Base branch uses: actions/checkout@v4 with: - ref: master + ref: ${{ github.base_ref || github.ref_name }} - name: Set up Go uses: actions/setup-go@v5 @@ -324,10 +324,10 @@ jobs: - name: Free up disk space uses: ./.github/actions/free-disk-space - - name: Download test-image-master + - name: Download test-image-base uses: actions/download-artifact@v4 with: - name: test-image-master + name: test-image-base - name: Disable ufw # For IPv6 and Dualstack, ufw (Uncomplicated Firewall) should be disabled. @@ -337,7 +337,7 @@ jobs: - name: Load docker image run: | - docker load --input ${CI_IMAGE_MASTER_TAR} && rm -rf ${CI_IMAGE_MASTER_TAR} + docker load --input ${CI_IMAGE_BASE_TAR} && rm -rf ${CI_IMAGE_BASE_TAR} - name: kind setup run: | From 65e44a735b7278fccec82eeaaa53c487b0382518 Mon Sep 17 00:00:00 2001 From: zhaozhanqi Date: Sat, 11 Oct 2025 09:24:48 +0000 Subject: [PATCH 05/20] unskip cases as bug is verified Signed-off-by: zhaozhanqi --- test/scripts/e2e-cp.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/scripts/e2e-cp.sh b/test/scripts/e2e-cp.sh index 1ab06622f6..4d096c6ad6 100755 --- a/test/scripts/e2e-cp.sh +++ b/test/scripts/e2e-cp.sh @@ -68,7 +68,7 @@ if [ "$OVN_HA" == false ]; then # TODO streamline the db delete tests skip "recovering from deleting db files while maintaining connectivity" skip "Should validate connectivity before and after deleting all the db-pods at once in HA mode" -else +else skip "Should validate connectivity before and after deleting all the db-pods at once in Non-HA mode" skip "e2e br-int NetFlow export validation" fi @@ -150,7 +150,7 @@ else if [ "$ENABLE_NETWORK_SEGMENTATION" = true ]; then skip_label "Feature:RouteAdvertisements && EXTENDED" fi - + # Some test don't work when the default network is advertised, either because # the configuration that the test excercises does not make sense for an advertised network, or # there is some bug or functional gap @@ -185,9 +185,6 @@ else # https://issues.redhat.com/browse/OCPBUGS-55028 skip "e2e egress IP validation Cluster Default Network \[secondary-host-eip\]" - # https://issues.redhat.com/browse/OCPBUGS-50636 - skip "Services of type NodePort should listen on each host addresses" - skip "Services of type NodePort should work on secondary node interfaces for ETP=local and ETP=cluster when backend pods are also served by EgressIP" # https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5240 skip "e2e control plane test node readiness according to its defaults interface MTU size should get node not ready with a too small MTU" From cde88f273e741a39a252fe6c330417f2dfc459cf Mon Sep 17 00:00:00 2001 From: zhaozhanqi Date: Mon, 13 Oct 2025 02:18:39 +0000 Subject: [PATCH 06/20] The expect expectedEndpointsNum should be 2 times endpoints for dualstack cluster Signed-off-by: zhaozhanqi --- test/e2e/service.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/test/e2e/service.go b/test/e2e/service.go index 6acfc77c8d..1cd8f560f1 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -917,14 +917,20 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { framework.ExpectNoError(err) ginkgo.By("Waiting for the endpoints to pop up") + var expectedEndpointsNum int + if isDualStackCluster(nodes) { + expectedEndpointsNum = len(endPoints) * 2 + } else { + expectedEndpointsNum = len(endPoints) + } err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, f.Namespace.Name, - etpLocalServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + etpLocalServiceName, expectedEndpointsNum, time.Second, wait.ForeverTestTimeout) framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpLocalServiceName, f.Namespace.Name) err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, f.Namespace.Name, - etpClusterServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + etpClusterServiceName, expectedEndpointsNum, time.Second, wait.ForeverTestTimeout) framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpClusterServiceName, f.Namespace.Name) @@ -1171,13 +1177,20 @@ spec: ginkgo.By("Waiting for the endpoints to pop up") + var expectedEndpointsNum int + if isDualStackCluster(nodes) { + expectedEndpointsNum = len(endPoints) * 2 + } else { + expectedEndpointsNum = len(endPoints) + } + err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, f.Namespace.Name, - etpLocalServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + etpLocalServiceName, expectedEndpointsNum, time.Second, wait.ForeverTestTimeout) framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpLocalServiceName, f.Namespace.Name) err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, f.Namespace.Name, - etpClusterServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + etpClusterServiceName, expectedEndpointsNum, time.Second, wait.ForeverTestTimeout) framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpClusterServiceName, f.Namespace.Name) From 06515935075a54a58191d32f95cc8911d7ab86fb Mon Sep 17 00:00:00 2001 From: Andreas Karis Date: Tue, 23 Sep 2025 20:26:50 +0200 Subject: [PATCH 07/20] Fix named port handling in externalTrafficPolicy Local services Addresses incorrect DNAT rules with /0 target port when using services with externalTrafficPolicy: Local and named ports. The issue occurred when allocateLoadBalancerNodePorts was false and services referenced pod named ports. The previous implementation used svcPort.TargetPort.IntValue() which returns 0 for named ports, causing invalid DNAT rules. This refactoring introduces/uses structured endpoint types that properly handle port mapping from endpoint slices, ensuring the actual pod port numbers are used instead of attempting to convert named ports to integers. This change unifies endpoint processing logic by having both the services controller and nodePortWatcher use the same GetEndpointsForService function. This ensures consistent endpoint resolution and port mapping behavior across all service-related components, preventing divergence in logic and similar unnoticed port handling issues in the future. Signed-off-by: Andreas Karis --- go-controller/pkg/node/gateway_iptables.go | 29 +- .../pkg/node/gateway_localnet_linux_test.go | 47 +- go-controller/pkg/node/gateway_nftables.go | 31 +- go-controller/pkg/node/gateway_shared_intf.go | 60 ++- .../pkg/ovn/controller/services/lb_config.go | 144 +----- .../ovn/controller/services/lb_config_test.go | 416 +++++++++--------- .../services/services_controller.go | 2 +- go-controller/pkg/util/kube.go | 10 +- go-controller/pkg/util/kube_test.go | 13 +- go-controller/pkg/util/util.go | 390 ++++++++++++++++ 10 files changed, 738 insertions(+), 404 deletions(-) diff --git a/go-controller/pkg/node/gateway_iptables.go b/go-controller/pkg/node/gateway_iptables.go index 90bffbe91f..d68f770121 100644 --- a/go-controller/pkg/node/gateway_iptables.go +++ b/go-controller/pkg/node/gateway_iptables.go @@ -183,15 +183,30 @@ func computeProbability(n, i int) string { return fmt.Sprintf("%0.10f", 1.0/float64(n-i+1)) } -func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort corev1.ServicePort, externalIP string, localEndpoints []string) []nodeipt.Rule { - iptRules := make([]nodeipt.Rule, 0, len(localEndpoints)) +// generateIPTRulesForLoadBalancersWithoutNodePorts generates iptables DNAT rules for load balancer services +// without NodePort allocation. It performs statistical load balancing between endpoints via iptables. +func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort corev1.ServicePort, externalIP string, localEndpoints util.PortToLBEndpoints) []nodeipt.Rule { if len(localEndpoints) == 0 { // either its smart nic mode; etp&itp not implemented, OR // fetching endpointSlices error-ed out prior to reaching here so nothing to do - return iptRules + return []nodeipt.Rule{} } - numLocalEndpoints := len(localEndpoints) - for i, ip := range localEndpoints { + + // Get the endpoints for the port key. + // svcPortKey is of format e.g. "TCP/my-port-name" or "TCP/" if name is empty + // (is the case when only a single ServicePort is defined on this service). + svcPortKey := util.GetServicePortKey(svcPort.Protocol, svcPort.Name) + lbEndpoints := localEndpoints[svcPortKey] + + // Get IPv4 or IPv6 IPs, depending on the type of the service's external IP. + destinations := lbEndpoints.GetV4Destinations() + if utilnet.IsIPv6String(externalIP) { + destinations = lbEndpoints.GetV6Destinations() + } + + numLocalEndpoints := len(destinations) + iptRules := make([]nodeipt.Rule, 0, numLocalEndpoints) + for i, destination := range destinations { iptRules = append([]nodeipt.Rule{ { Table: "nat", @@ -201,7 +216,7 @@ func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort corev1.ServicePort "-d", externalIP, "--dport", fmt.Sprintf("%v", svcPort.Port), "-j", "DNAT", - "--to-destination", util.JoinHostPortInt32(ip, int32(svcPort.TargetPort.IntValue())), + "--to-destination", util.JoinHostPortInt32(destination.IP, destination.Port), "-m", "statistic", "--mode", "random", "--probability", computeProbability(numLocalEndpoints, i+1), @@ -476,7 +491,7 @@ func recreateIPTRules(table, chain string, keepIPTRules []nodeipt.Rule) error { // case3: if svcHasLocalHostNetEndPnt and svcTypeIsITPLocal, rule that redirects clusterIP traffic to host targetPort is added. // // if !svcHasLocalHostNetEndPnt and svcTypeIsITPLocal, rule that marks clusterIP traffic to steer it to ovn-k8s-mp0 is added. -func getGatewayIPTRules(service *corev1.Service, localEndpoints []string, svcHasLocalHostNetEndPnt bool) []nodeipt.Rule { +func getGatewayIPTRules(service *corev1.Service, localEndpoints util.PortToLBEndpoints, svcHasLocalHostNetEndPnt bool) []nodeipt.Rule { rules := make([]nodeipt.Rule, 0) clusterIPs := util.GetClusterIPs(service) svcTypeIsETPLocal := util.ServiceExternalTrafficPolicyLocal(service) diff --git a/go-controller/pkg/node/gateway_localnet_linux_test.go b/go-controller/pkg/node/gateway_localnet_linux_test.go index 49e4d1ee13..c67958f383 100644 --- a/go-controller/pkg/node/gateway_localnet_linux_test.go +++ b/go-controller/pkg/node/gateway_localnet_linux_test.go @@ -562,6 +562,7 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeLocal epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { @@ -579,8 +580,9 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.3"}, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will // come under !hasLocalHostNetEp case @@ -878,9 +880,11 @@ var _ = Describe("Node Operations", func() { Cmd: "ovs-ofctl show ", Err: fmt.Errorf("deliberate error to fall back to output:LOCAL"), }) + svcPortName := "http" service := *newServiceWithoutNodePortAllocation("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { + Name: svcPortName, Protocol: corev1.ProtocolTCP, Port: int32(80), TargetPort: intstr.FromInt(int(int32(8080))), @@ -910,11 +914,12 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.4"}, NodeName: &fakeNodeName, } - epPortName := "http" epPortValue := int32(8080) + epPortProtocol := corev1.ProtocolTCP epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &svcPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will // come under !hasLocalHostNetEp case @@ -2092,6 +2097,7 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeLocal epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { @@ -2109,8 +2115,9 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.3"}, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will come // under !hasLocalHostNetEp case @@ -2231,6 +2238,7 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeShared epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { @@ -2249,8 +2257,9 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.3"}, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will come // under !hasLocalHostNetEp case @@ -2376,9 +2385,11 @@ var _ = Describe("Node Operations", func() { outport := int32(443) epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { + Name: epPortName, NodePort: int32(31111), Protocol: corev1.ProtocolTCP, Port: int32(8080), @@ -2396,8 +2407,9 @@ var _ = Describe("Node Operations", func() { NodeName: &fakeNodeName, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will // come under !hasLocalHostNetEp case @@ -2522,6 +2534,7 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeShared epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { @@ -2539,8 +2552,9 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.3"}, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will // come under !hasLocalHostNetEp case @@ -2665,9 +2679,11 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeLocal epPortName := "https" outport := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { + Name: epPortName, NodePort: int32(31111), Protocol: corev1.ProtocolTCP, Port: int32(8080), @@ -2684,8 +2700,9 @@ var _ = Describe("Node Operations", func() { NodeName: &fakeNodeName, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &outport, + Name: &epPortName, + Port: &outport, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is host-networked so this will // come under hasLocalHostNetEp case diff --git a/go-controller/pkg/node/gateway_nftables.go b/go-controller/pkg/node/gateway_nftables.go index b38f2baebb..eb776c1f5f 100644 --- a/go-controller/pkg/node/gateway_nftables.go +++ b/go-controller/pkg/node/gateway_nftables.go @@ -55,23 +55,34 @@ func getNoSNATNodePortRules(svcPort corev1.ServicePort) []*knftables.Element { // "mgmtport-no-snat-services-v4" and "mgmtport-no-snat-services-v6" sets to prevent SNAT // of sourceIP when passing through the management port, for an `externalTrafficPolicy: // Local` service *without* NodePorts. -func getNoSNATLoadBalancerIPRules(svcPort corev1.ServicePort, localEndpoints []string) []*knftables.Element { +func getNoSNATLoadBalancerIPRules(svcPort corev1.ServicePort, localEndpoints util.PortToLBEndpoints) []*knftables.Element { var nftRules []*knftables.Element protocol := strings.ToLower(string(svcPort.Protocol)) - port := fmt.Sprintf("%v", svcPort.TargetPort.IntValue()) - for _, ip := range localEndpoints { - setName := types.NFTMgmtPortNoSNATServicesV4 - if utilnet.IsIPv6String(ip) { - setName = types.NFTMgmtPortNoSNATServicesV6 - } + // Get the endpoints for the port key. + // svcPortKey is of format e.g. "TCP/my-port-name" or "TCP/" if name is empty + // (is the case when only a single ServicePort is defined on this service). + svcPortKey := util.GetServicePortKey(svcPort.Protocol, svcPort.Name) + lbEndpoints := localEndpoints[svcPortKey] + + for _, destination := range lbEndpoints.GetV4Destinations() { nftRules = append(nftRules, &knftables.Element{ - Set: setName, - Key: []string{ip, protocol, port}, + Set: types.NFTMgmtPortNoSNATServicesV4, + Key: []string{destination.IP, protocol, fmt.Sprintf("%d", destination.Port)}, }, ) } + + for _, destination := range lbEndpoints.GetV6Destinations() { + nftRules = append(nftRules, + &knftables.Element{ + Set: types.NFTMgmtPortNoSNATServicesV6, + Key: []string{destination.IP, protocol, fmt.Sprintf("%d", destination.Port)}, + }, + ) + } + return nftRules } @@ -164,7 +175,7 @@ func recreateNFTMap(mapName string, keepNFTElems []*knftables.Element) error { // getGatewayNFTRules returns nftables rules for service. This must be used in conjunction // with getGatewayIPTRules. -func getGatewayNFTRules(service *corev1.Service, localEndpoints []string, svcHasLocalHostNetEndPnt bool) []*knftables.Element { +func getGatewayNFTRules(service *corev1.Service, localEndpoints util.PortToLBEndpoints, svcHasLocalHostNetEndPnt bool) []*knftables.Element { rules := make([]*knftables.Element, 0) svcTypeIsETPLocal := util.ServiceExternalTrafficPolicyLocal(service) for _, svcPort := range service.Spec.Ports { diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index 34f84dfc8c..8c70a081c3 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -208,7 +208,7 @@ type serviceConfig struct { // hasLocalHostNetworkEp will be true for a service if it has at least one endpoint which is "hostnetworked&local-to-this-node". hasLocalHostNetworkEp bool // localEndpoints stores all the local non-host-networked endpoints for this service - localEndpoints sets.Set[string] + localEndpoints util.PortToLBEndpoints } type cidrAndFlags struct { @@ -616,7 +616,7 @@ func (npw *nodePortWatcher) getServiceInfo(index ktypes.NamespacedName) (out *se } // getAndSetServiceInfo creates and sets the serviceConfig, returns if it existed and whatever was there -func (npw *nodePortWatcher) getAndSetServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp bool, localEndpoints sets.Set[string]) (old *serviceConfig, exists bool) { +func (npw *nodePortWatcher) getAndSetServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp bool, localEndpoints util.PortToLBEndpoints) (old *serviceConfig, exists bool) { npw.serviceInfoLock.Lock() defer npw.serviceInfoLock.Unlock() @@ -630,7 +630,7 @@ func (npw *nodePortWatcher) getAndSetServiceInfo(index ktypes.NamespacedName, se } // addOrSetServiceInfo creates and sets the serviceConfig if it doesn't exist -func (npw *nodePortWatcher) addOrSetServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp bool, localEndpoints sets.Set[string]) (exists bool) { +func (npw *nodePortWatcher) addOrSetServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp bool, localEndpoints util.PortToLBEndpoints) (exists bool) { npw.serviceInfoLock.Lock() defer npw.serviceInfoLock.Unlock() @@ -645,7 +645,7 @@ func (npw *nodePortWatcher) addOrSetServiceInfo(index ktypes.NamespacedName, ser // updateServiceInfo sets the serviceConfig for a service and returns the existing serviceConfig, if inputs are nil // do not update those fields, if it does not exist return nil. -func (npw *nodePortWatcher) updateServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp *bool, localEndpoints sets.Set[string]) (old *serviceConfig, exists bool) { +func (npw *nodePortWatcher) updateServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp *bool, localEndpoints util.PortToLBEndpoints) (old *serviceConfig, exists bool) { npw.serviceInfoLock.Lock() defer npw.serviceInfoLock.Unlock() @@ -672,7 +672,7 @@ func (npw *nodePortWatcher) updateServiceInfo(index ktypes.NamespacedName, servi // addServiceRules ensures the correct iptables rules and OpenFlow physical // flows are programmed for a given service and endpoint configuration -func addServiceRules(service *corev1.Service, netInfo util.NetInfo, localEndpoints []string, svcHasLocalHostNetEndPnt bool, npw *nodePortWatcher) error { +func addServiceRules(service *corev1.Service, netInfo util.NetInfo, localEndpoints util.PortToLBEndpoints, svcHasLocalHostNetEndPnt bool, npw *nodePortWatcher) error { // For dpu or Full mode var err error var errors []error @@ -716,7 +716,7 @@ func addServiceRules(service *corev1.Service, netInfo util.NetInfo, localEndpoin // delServiceRules deletes all possible iptables rules and OpenFlow physical // flows for a service -func delServiceRules(service *corev1.Service, localEndpoints []string, npw *nodePortWatcher) error { +func delServiceRules(service *corev1.Service, localEndpoints util.PortToLBEndpoints, npw *nodePortWatcher) error { var err error var errors []error // full mode || dpu mode @@ -820,7 +820,7 @@ func serviceUpdateNotNeeded(old, new *corev1.Service) bool { // AddService handles configuring shared gateway bridge flows to steer External IP, Node Port, Ingress LB traffic into OVN func (npw *nodePortWatcher) AddService(service *corev1.Service) error { - var localEndpoints sets.Set[string] + var localEndpoints util.PortToLBEndpoints var hasLocalHostNetworkEp bool if !util.ServiceTypeHasClusterIP(service) || !util.IsClusterIPSet(service) { return nil @@ -853,9 +853,9 @@ func (npw *nodePortWatcher) AddService(service *corev1.Service) error { if exists := npw.addOrSetServiceInfo(name, service, hasLocalHostNetworkEp, localEndpoints); !exists { klog.V(5).Infof("Service Add %s event in namespace %s came before endpoint event setting svcConfig", service.Name, service.Namespace) - if err := addServiceRules(service, netInfo, sets.List(localEndpoints), hasLocalHostNetworkEp, npw); err != nil { + if err := addServiceRules(service, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { npw.getAndDeleteServiceInfo(name) - return fmt.Errorf("AddService failed for nodePortWatcher: %w, trying delete: %v", err, delServiceRules(service, sets.List(localEndpoints), npw)) + return fmt.Errorf("AddService failed for nodePortWatcher: %w, trying delete: %v", err, delServiceRules(service, localEndpoints, npw)) } } else { // Need to update flows here in case an attribute of the gateway has changed, such as MAC address @@ -892,7 +892,7 @@ func (npw *nodePortWatcher) UpdateService(old, new *corev1.Service) error { // so that we don't miss any endpoint update events here klog.V(5).Infof("Deleting old service rules for: %v", old) - if err = delServiceRules(old, sets.List(svcConfig.localEndpoints), npw); err != nil { + if err = delServiceRules(old, svcConfig.localEndpoints, npw); err != nil { errors = append(errors, err) } } @@ -905,7 +905,7 @@ func (npw *nodePortWatcher) UpdateService(old, new *corev1.Service) error { return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } - if err = addServiceRules(new, netInfo, sets.List(svcConfig.localEndpoints), svcConfig.hasLocalHostNetworkEp, npw); err != nil { + if err = addServiceRules(new, netInfo, svcConfig.localEndpoints, svcConfig.hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) } } @@ -968,7 +968,7 @@ func (npw *nodePortWatcher) DeleteService(service *corev1.Service) error { klog.V(5).Infof("Deleting service %s in namespace %s", service.Name, service.Namespace) name := ktypes.NamespacedName{Namespace: service.Namespace, Name: service.Name} if svcConfig, exists := npw.getAndDeleteServiceInfo(name); exists { - if err = delServiceRules(svcConfig.service, sets.List(svcConfig.localEndpoints), npw); err != nil { + if err = delServiceRules(svcConfig.service, svcConfig.localEndpoints, npw); err != nil { errors = append(errors, err) } } else { @@ -1040,9 +1040,8 @@ func (npw *nodePortWatcher) SyncServices(services []interface{}) error { } // Add correct netfilter rules only for Full mode if !npw.dpuMode { - localEndpointsArray := sets.List(localEndpoints) - keepIPTRules = append(keepIPTRules, getGatewayIPTRules(service, localEndpointsArray, hasLocalHostNetworkEp)...) - keepNFTSetElems = append(keepNFTSetElems, getGatewayNFTRules(service, localEndpointsArray, hasLocalHostNetworkEp)...) + keepIPTRules = append(keepIPTRules, getGatewayIPTRules(service, localEndpoints, hasLocalHostNetworkEp)...) + keepNFTSetElems = append(keepNFTSetElems, getGatewayNFTRules(service, localEndpoints, hasLocalHostNetworkEp)...) if util.IsNetworkSegmentationSupportEnabled() && netInfo.IsPrimaryNetwork() { netConfig := npw.ofm.getActiveNetwork(netInfo) if netConfig == nil { @@ -1140,7 +1139,7 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e out, exists := npw.getServiceInfo(*svcNamespacedName) if !exists { klog.V(5).Infof("Endpointslice %s ADD event in namespace %s is creating rules", epSlice.Name, epSlice.Namespace) - if err = addServiceRules(svc, netInfo, sets.List(localEndpoints), hasLocalHostNetworkEp, npw); err != nil { + if err = addServiceRules(svc, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { return err } npw.addOrSetServiceInfo(*svcNamespacedName, svc, hasLocalHostNetworkEp, localEndpoints) @@ -1150,10 +1149,10 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e if out.hasLocalHostNetworkEp != hasLocalHostNetworkEp || (!util.LoadBalancerServiceHasNodePortAllocation(svc) && !reflect.DeepEqual(out.localEndpoints, localEndpoints)) { klog.V(5).Infof("Endpointslice %s ADD event in namespace %s is updating rules", epSlice.Name, epSlice.Namespace) - if err = delServiceRules(svc, sets.List(out.localEndpoints), npw); err != nil { + if err = delServiceRules(svc, out.localEndpoints, npw); err != nil { errors = append(errors, err) } - if err = addServiceRules(svc, netInfo, sets.List(localEndpoints), hasLocalHostNetworkEp, npw); err != nil { + if err = addServiceRules(svc, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) } else { npw.updateServiceInfo(*svcNamespacedName, svc, &hasLocalHostNetworkEp, localEndpoints) @@ -1211,10 +1210,10 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice npw.serviceInfoLock.Lock() defer npw.serviceInfoLock.Unlock() - if err = delServiceRules(svcConfig.service, sets.List(svcConfig.localEndpoints), npw); err != nil { + if err = delServiceRules(svcConfig.service, svcConfig.localEndpoints, npw); err != nil { errors = append(errors, err) } - if err = addServiceRules(svcConfig.service, netInfo, sets.List(localEndpoints), hasLocalHostNetworkEp, npw); err != nil { + if err = addServiceRules(svcConfig.service, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) } return utilerrors.Join(errors...) @@ -1222,9 +1221,24 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice return nil } -// GetLocalEndpointAddresses returns a list of eligible endpoints that are local to the node -func (npw *nodePortWatcher) GetLocalEligibleEndpointAddresses(endpointSlices []*discovery.EndpointSlice, service *corev1.Service) sets.Set[string] { - return util.GetLocalEligibleEndpointAddressesFromSlices(endpointSlices, service, npw.nodeIPManager.nodeName) +// GetLocalEligibleEndpointAddresses returns eligible endpoints that are local to the node. +// This method uses util.GetEndpointsForService, the same as the services Controller via buildServiceLBConfigs, +// meaning that the nodePortWatcher and the services Controller now use common logic to build their service endpoints. +func (npw *nodePortWatcher) GetLocalEligibleEndpointAddresses(endpointSlices []*discovery.EndpointSlice, + service *corev1.Service) util.PortToLBEndpoints { + s := sets.Set[string]{} + s.Insert(npw.nodeIPManager.nodeName) + _, portToNodeToLBEndpoints, err := util.GetEndpointsForService(endpointSlices, service, s, false, true) + if err != nil { + if service != nil { + klog.Warningf("Failed to get local endpoints for service %s/%s on node %s: %v", + service.Namespace, service.Name, npw.nodeIPManager.nodeName, err) + } else { + klog.Warningf("Failed to get local endpoints on node %s: %v", npw.nodeIPManager.nodeName, err) + } + } + + return portToNodeToLBEndpoints.GetNode(npw.nodeIPManager.nodeName) } func (npw *nodePortWatcher) UpdateEndpointSlice(oldEpSlice, newEpSlice *discovery.EndpointSlice) error { diff --git a/go-controller/pkg/ovn/controller/services/lb_config.go b/go-controller/pkg/ovn/controller/services/lb_config.go index 461827041c..c58b278e46 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config.go +++ b/go-controller/pkg/ovn/controller/services/lb_config.go @@ -29,8 +29,8 @@ type lbConfig struct { protocol corev1.Protocol // TCP, UDP, or SCTP inport int32 // the incoming (virtual) port number - clusterEndpoints lbEndpoints // addresses of cluster-wide endpoints - nodeEndpoints map[string]lbEndpoints // node -> addresses of local endpoints + clusterEndpoints util.LBEndpoints // addresses of cluster-wide endpoints + nodeEndpoints map[string]util.LBEndpoints // node -> addresses of local endpoints // if true, then vips added on the router are in "local" mode // that means, skipSNAT, and remove any non-local endpoints. @@ -43,12 +43,6 @@ type lbConfig struct { hasNodePort bool } -type lbEndpoints struct { - Port int32 - V4IPs []string - V6IPs []string -} - func makeNodeSwitchTargetIPs(node string, c *lbConfig) (targetIPsV4, targetIPsV6 []string, v4Changed, v6Changed bool) { targetIPsV4 = c.clusterEndpoints.V4IPs targetIPsV6 = c.clusterEndpoints.V6IPs @@ -137,7 +131,7 @@ var protos = []corev1.Protocol{ // - services with NodePort set but *without* ExternalTrafficPolicy=Local or // affinity timeout set. func buildServiceLBConfigs(service *corev1.Service, endpointSlices []*discovery.EndpointSlice, nodeInfos []nodeInfo, - useLBGroup, useTemplates bool, networkName string) (perNodeConfigs, templateConfigs, clusterConfigs []lbConfig) { + useLBGroup, useTemplates bool) (perNodeConfigs, templateConfigs, clusterConfigs []lbConfig) { needsAffinityTimeout := hasSessionAffinityTimeOut(service) @@ -146,13 +140,21 @@ func buildServiceLBConfigs(service *corev1.Service, endpointSlices []*discovery. nodes.Insert(n.name) } // get all the endpoints classified by port and by port,node - portToClusterEndpoints, portToNodeToEndpoints := getEndpointsForService(endpointSlices, service, nodes, networkName) + needsLocalEndpoints := util.ServiceExternalTrafficPolicyLocal(service) || util.ServiceInternalTrafficPolicyLocal(service) + portToClusterEndpoints, portToNodeToEndpoints, err := util.GetEndpointsForService(endpointSlices, service, nodes, true, needsLocalEndpoints) + if err != nil { + if service != nil { + klog.Warningf("Failed to get endpoints for service %s/%s during LB config build: %v", service.Namespace, service.Name, err) + } else { + klog.Warningf("Failed to get endpoints for service during LB config build: %v", err) + } + } for _, svcPort := range service.Spec.Ports { - svcPortKey := getServicePortKey(svcPort.Protocol, svcPort.Name) + svcPortKey := util.GetServicePortKey(svcPort.Protocol, svcPort.Name) clusterEndpoints := portToClusterEndpoints[svcPortKey] nodeEndpoints := portToNodeToEndpoints[svcPortKey] if nodeEndpoints == nil { - nodeEndpoints = make(map[string]lbEndpoints) + nodeEndpoints = make(map[string]util.LBEndpoints) } // if ExternalTrafficPolicy or InternalTrafficPolicy is local, then we need to do things a bit differently externalTrafficLocal := util.ServiceExternalTrafficPolicyLocal(service) @@ -888,121 +890,3 @@ func joinHostsPort(ips []string, port int32) []Addr { } return out } - -func getServicePortKey(protocol corev1.Protocol, name string) string { - return fmt.Sprintf("%s/%s", protocol, name) -} - -// GetEndpointsForService takes a service, all its slices and the list of nodes in the OVN zone -// and returns two maps that hold all the endpoint addresses for the service: -// one classified by port, one classified by port,node. This second map is only filled in -// when the service needs local (per-node) endpoints, that is when ETP=local or ITP=local. -// The node list helps to keep the resulting map small, since we're only interested in local endpoints. -func getEndpointsForService(slices []*discovery.EndpointSlice, service *corev1.Service, nodes sets.Set[string], - networkName string) (map[string]lbEndpoints, map[string]map[string]lbEndpoints) { - - // classify endpoints - ports := map[string]int32{} - portToEndpoints := map[string][]discovery.Endpoint{} - portToNodeToEndpoints := map[string]map[string][]discovery.Endpoint{} - requiresLocalEndpoints := util.ServiceExternalTrafficPolicyLocal(service) || util.ServiceInternalTrafficPolicyLocal(service) - - for _, port := range service.Spec.Ports { - name := getServicePortKey(port.Protocol, port.Name) - ports[name] = 0 - } - - for _, slice := range slices { - - if slice.AddressType == discovery.AddressTypeFQDN { - continue // consider only v4 and v6, discard FQDN - } - - slicePorts := make([]string, 0, len(slice.Ports)) - - for _, port := range slice.Ports { - // check if there's a service port matching the slice protocol/name - slicePortName := "" - if port.Name != nil { - slicePortName = *port.Name - } - name := getServicePortKey(*port.Protocol, slicePortName) - if _, hasPort := ports[name]; hasPort { - slicePorts = append(slicePorts, name) - ports[name] = *port.Port - continue - } - // service port name might be empty: check against slice protocol/"" - noName := getServicePortKey(*port.Protocol, "") - if _, hasPort := ports[noName]; hasPort { - slicePorts = append(slicePorts, name) - ports[noName] = *port.Port - } - } - for _, endpoint := range slice.Endpoints { - for _, port := range slicePorts { - - portToEndpoints[port] = append(portToEndpoints[port], endpoint) - - // won't add items to portToNodeToEndpoints if the service doesn't need it, - // the endpoint is not assigned to a node yet or the endpoint is not local to the OVN zone - if !requiresLocalEndpoints || endpoint.NodeName == nil || !nodes.Has(*endpoint.NodeName) { - continue - } - if portToNodeToEndpoints[port] == nil { - portToNodeToEndpoints[port] = make(map[string][]discovery.Endpoint, len(nodes)) - } - - if portToNodeToEndpoints[port][*endpoint.NodeName] == nil { - portToNodeToEndpoints[port][*endpoint.NodeName] = []discovery.Endpoint{} - } - portToNodeToEndpoints[port][*endpoint.NodeName] = append(portToNodeToEndpoints[port][*endpoint.NodeName], endpoint) - } - } - } - - // get eligible endpoint addresses - portToLBEndpoints := make(map[string]lbEndpoints, len(portToEndpoints)) - portToNodeToLBEndpoints := make(map[string]map[string]lbEndpoints, len(portToEndpoints)) - - for port, endpoints := range portToEndpoints { - addresses := util.GetEligibleEndpointAddresses(endpoints, service) - v4IPs, _ := util.MatchAllIPStringFamily(false, addresses) - v6IPs, _ := util.MatchAllIPStringFamily(true, addresses) - if len(v4IPs) > 0 || len(v6IPs) > 0 { - portToLBEndpoints[port] = lbEndpoints{ - V4IPs: v4IPs, - V6IPs: v6IPs, - Port: ports[port], - } - } - } - klog.V(5).Infof("Cluster endpoints for %s/%s for network=%s are: %v", - service.Namespace, service.Name, networkName, portToLBEndpoints) - - for port, nodeToEndpoints := range portToNodeToEndpoints { - for node, endpoints := range nodeToEndpoints { - addresses := util.GetEligibleEndpointAddresses(endpoints, service) - v4IPs, _ := util.MatchAllIPStringFamily(false, addresses) - v6IPs, _ := util.MatchAllIPStringFamily(true, addresses) - if len(v4IPs) > 0 || len(v6IPs) > 0 { - if portToNodeToLBEndpoints[port] == nil { - portToNodeToLBEndpoints[port] = make(map[string]lbEndpoints, len(nodes)) - } - - portToNodeToLBEndpoints[port][node] = lbEndpoints{ - V4IPs: v4IPs, - V6IPs: v6IPs, - Port: ports[port], - } - } - } - } - - if requiresLocalEndpoints { - klog.V(5).Infof("Local endpoints for %s/%s for network=%s are: %v", - service.Namespace, service.Name, networkName, portToNodeToLBEndpoints) - } - - return portToLBEndpoints, portToNodeToLBEndpoints -} diff --git a/go-controller/pkg/ovn/controller/services/lb_config_test.go b/go-controller/pkg/ovn/controller/services/lb_config_test.go index 177ab487e1..1e30c7c302 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config_test.go +++ b/go-controller/pkg/ovn/controller/services/lb_config_test.go @@ -247,8 +247,8 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{}, - nodeEndpoints: map[string]lbEndpoints{}, + clusterEndpoints: util.LBEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }}, resultsSame: true, }, @@ -275,11 +275,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, // service is not ETP=local or ITP=local, so nodeEndpoints is not filled out + nodeEndpoints: util.PortToLBEndpoints{}, // service is not ETP=local or ITP=local, so nodeEndpoints is not filled out }}, resultsSame: true, }, @@ -307,11 +307,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ // service is ETP=local, so nodeEndpoints is filled out + nodeEndpoints: util.PortToLBEndpoints{ // service is ETP=local, so nodeEndpoints is filled out nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -374,21 +374,21 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport1, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport1, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, }, @@ -446,21 +446,21 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolUDP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, }, @@ -488,12 +488,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }}, }, { @@ -528,12 +528,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1", "4.2.2.2", "42::42", "5.5.5.5"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, // ETP=cluster (default), so nodeEndpoints is not filled out + nodeEndpoints: util.PortToLBEndpoints{}, // ETP=cluster (default), so nodeEndpoints is not filled out }}, }, { @@ -570,12 +570,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, @@ -590,12 +590,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { protocol: corev1.ProtocolTCP, inport: inport, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, @@ -630,23 +630,23 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }}, resultSharedGatewayTemplate: []lbConfig{{ vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, hasNodePort: true, }}, }, @@ -677,12 +677,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, resultSharedGatewayTemplate: []lbConfig{ @@ -690,12 +690,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, hasNodePort: true, }, }, @@ -705,12 +705,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, resultLocalGatewayTemplate: []lbConfig{ @@ -718,12 +718,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, hasNodePort: true, }, }, @@ -756,12 +756,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, @@ -775,12 +775,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, @@ -794,12 +794,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, @@ -813,12 +813,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, @@ -854,12 +854,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, resultLocalGatewayNode: []lbConfig{ @@ -867,12 +867,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, }, @@ -915,11 +915,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -938,11 +938,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: 5, hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -959,11 +959,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: inport, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1016,11 +1016,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1039,11 +1039,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: 5, hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1060,11 +1060,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: inport, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1115,11 +1115,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1134,11 +1134,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: 5, hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1151,11 +1151,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: inport, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1170,7 +1170,7 @@ func Test_buildServiceLBConfigs(t *testing.T) { t.Run(fmt.Sprintf("%d_%s", i, tt.name), func(t *testing.T) { // shared gateway mode globalconfig.Gateway.Mode = globalconfig.GatewayModeShared - perNode, template, clusterWide := buildServiceLBConfigs(tt.args.service, tt.args.slices, defaultNodes, true, true, types.DefaultNetworkName) + perNode, template, clusterWide := buildServiceLBConfigs(tt.args.service, tt.args.slices, defaultNodes, true, true) assert.EqualValues(t, tt.resultSharedGatewayNode, perNode, "SGW per-node configs should be equal") assert.EqualValues(t, tt.resultSharedGatewayTemplate, template, "SGW template configs should be equal") @@ -1179,7 +1179,7 @@ func Test_buildServiceLBConfigs(t *testing.T) { // local gateway mode globalconfig.Gateway.Mode = globalconfig.GatewayModeLocal - perNode, template, clusterWide = buildServiceLBConfigs(tt.args.service, tt.args.slices, defaultNodes, true, true, types.DefaultNetworkName) + perNode, template, clusterWide = buildServiceLBConfigs(tt.args.service, tt.args.slices, defaultNodes, true, true) if tt.resultsSame { assert.EqualValues(t, tt.resultSharedGatewayNode, perNode, "LGW per-node configs should be equal") assert.EqualValues(t, tt.resultSharedGatewayTemplate, template, "LGW template configs should be equal") @@ -1239,11 +1239,11 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1", "192.168.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1", "192.168.0.2"}, Port: 8080, @@ -1254,11 +1254,11 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolTCP, inport: 443, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, Port: 8043, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, Port: 8043, @@ -1298,11 +1298,11 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1", "192.168.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1", "192.168.0.2"}, Port: 8080, @@ -1313,11 +1313,11 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolUDP, inport: 443, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, Port: 8043, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, Port: 8043, @@ -1369,13 +1369,13 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4", "fe80::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1", "192.168.0.2"}, V6IPs: []string{"fe90::1", "fe91::1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1", "192.168.0.2"}, V6IPs: []string{"fe90::1", "fe91::1"}, @@ -1387,13 +1387,13 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4", "fe80::1"}, protocol: corev1.ProtocolTCP, inport: 443, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe90::1"}, Port: 8043, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe90::1"}, @@ -1554,11 +1554,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1604,11 +1604,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: 8080}, }, }, @@ -1690,11 +1690,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"192.168.0.1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1705,11 +1705,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1851,11 +1851,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"192.168.0.1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1868,11 +1868,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, externalTrafficLocal: true, hasNodePort: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1998,11 +1998,11 @@ func Test_buildPerNodeLBs(t *testing.T) { protocol: corev1.ProtocolTCP, inport: 80, internalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.1", "10.128.1.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.1"}, Port: 8080, @@ -2017,11 +2017,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"1.2.3.4"}, // externalIP config protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.1", "10.128.1.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.1"}, Port: 8080, @@ -2149,11 +2149,11 @@ func Test_buildPerNodeLBs(t *testing.T) { protocol: corev1.ProtocolTCP, inport: 80, internalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1", "10.0.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -2168,11 +2168,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"1.2.3.4"}, // externalIP config protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1", "10.0.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -2336,11 +2336,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, internalTrafficLocal: true, externalTrafficLocal: false, // ETP is applicable only to nodePorts and LBs - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, // only one ep on node-a Port: 8080, @@ -2354,11 +2354,11 @@ func Test_buildPerNodeLBs(t *testing.T) { externalTrafficLocal: true, internalTrafficLocal: false, // ITP is applicable only to clusterIPs hasNodePort: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, // only one ep on node-a Port: 8080, @@ -2576,11 +2576,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 5, // nodePort hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: 8080, @@ -2597,11 +2597,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: 8080, @@ -2731,11 +2731,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 5, // nodePort hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: 8080}, nodeB: {V4IPs: []string{"10.128.1.2"}, Port: 8080}, }, @@ -2746,11 +2746,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: 8080}, nodeB: {V4IPs: []string{"10.128.1.2"}, Port: 8080}, }, @@ -2883,11 +2883,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080}, }, }, @@ -2972,11 +2972,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 5, // nodePort hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080}, nodeB: {V6IPs: []string{"fe00:0:0:0:2::2"}, Port: 8080}, }, @@ -2987,11 +2987,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080}, nodeB: {V6IPs: []string{"fe00:0:0:0:2::2"}, Port: 8080}, }, @@ -3274,7 +3274,7 @@ func Test_idledServices(t *testing.T) { } } -func Test_getEndpointsForService(t *testing.T) { +func Test_GetEndpointsForService(t *testing.T) { type args struct { slices []*discovery.EndpointSlice svc *corev1.Service @@ -3284,8 +3284,9 @@ func Test_getEndpointsForService(t *testing.T) { tests := []struct { name string args args - wantClusterEndpoints map[string]lbEndpoints - wantNodeEndpoints map[string]map[string]lbEndpoints + wantClusterEndpoints util.PortToLBEndpoints + wantNodeEndpoints util.PortToNodeToLBEndpoints + wantError error }{ { name: "empty slices", @@ -3293,8 +3294,8 @@ func Test_getEndpointsForService(t *testing.T) { slices: []*discovery.EndpointSlice{}, svc: getSampleServiceWithOnePort(httpPortName, httpPortValue, tcp), }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no cluster-wide endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no local endpoints + wantClusterEndpoints: util.PortToLBEndpoints{}, // no cluster-wide endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no local endpoints }, { name: "slice with one local endpoint", @@ -3320,9 +3321,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no need for local endpoints, service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no need for local endpoints, service is not ETP or ITP local }, { name: "slice with one local endpoint, ETP=local", @@ -3348,10 +3349,10 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 80}}}, // ETP=local, one local endpoint + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 80}}}, // ETP=local, one local endpoint }, { name: "slice with one non-local endpoint, ETP=local", @@ -3377,9 +3378,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // ETP=local but no local endpoint + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // ETP=local but no local endpoint }, { name: "slice of address type FQDN", @@ -3405,8 +3406,8 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no local endpoint + wantClusterEndpoints: util.PortToLBEndpoints{}, // no endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no local endpoint }, { name: "slice with one endpoint, OVN zone with two nodes, ETP=local", @@ -3432,10 +3433,10 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), nodes: sets.New(nodeA, nodeB), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeB: lbEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 80}}}, // endpoint on nodeB + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeB: util.LBEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 80}}}, // endpoint on nodeB }, { name: "slice with different port name than the service", @@ -3461,8 +3462,8 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no cluster-wide endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no local endpoints + wantClusterEndpoints: util.PortToLBEndpoints{}, // no cluster-wide endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no local endpoints }, { name: "slice and service without a port name, ETP=local", @@ -3488,10 +3489,10 @@ func Test_getEndpointsForService(t *testing.T) { nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, ""): {V4IPs: []string{"10.0.0.2"}, Port: 8080}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, ""): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 8080}}}, // one local endpoint + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, ""): {V4IPs: []string{"10.0.0.2"}, Port: 8080}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, ""): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 8080}}}, // one local endpoint }, { name: "slice with an IPv6 endpoint", @@ -3517,9 +3518,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "a slice with an IPv4 endpoint and a slice with an IPv6 endpoint (dualstack cluster), ETP=local", @@ -3561,10 +3562,10 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}}, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}}, }, { name: "one slice with a duplicate address in the same endpoint", @@ -3590,9 +3591,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "one slice with a duplicate address across two endpoints", @@ -3618,9 +3619,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiples slices with a duplicate address, with both address being ready", @@ -3662,9 +3663,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2", "10.2.2.2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2", "10.2.2.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiples slices with different ports, ETP=local", @@ -3706,12 +3707,12 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithTwoPortsAndETPLocal("tcp-example", "other-port", 80, 8080, tcp, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}, - getServicePortKey(tcp, "other-port"): {V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, - getServicePortKey(tcp, "other-port"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}}, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}, + util.GetServicePortKey(tcp, "other-port"): {V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, + util.GetServicePortKey(tcp, "other-port"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}}, }, { name: "multiples slices with different ports, OVN zone with two nodes, ETP=local", @@ -3753,12 +3754,12 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithTwoPortsAndETPLocal("tcp-example", "other-port", 80, 8080, tcp, tcp), nodes: sets.New(nodeA, nodeB), // zone with two nodes }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}, - getServicePortKey(tcp, "other-port"): {V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, - getServicePortKey(tcp, "other-port"): {nodeB: lbEndpoints{V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}}, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}, + util.GetServicePortKey(tcp, "other-port"): {V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, + util.GetServicePortKey(tcp, "other-port"): {nodeB: util.LBEndpoints{V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}}, }, { name: "slice with a mix of ready and terminating (serving and non-serving) endpoints", @@ -3791,9 +3792,9 @@ func Test_getEndpointsForService(t *testing.T) { nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::2", "2001:db2::3"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::2", "2001:db2::3"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "slice with a mix of terminating (serving and non-serving) endpoints and no ready endpoints", @@ -3824,9 +3825,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::4", "2001:db2::5"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::4", "2001:db2::5"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "slice with only terminating non-serving endpoints", @@ -3855,8 +3856,8 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no cluster-wide endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{}, // no cluster-wide endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { @@ -3905,9 +3906,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::3", "2001:db2::4"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::3", "2001:db2::4"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiple slices with only terminating non-serving endpoints", @@ -3953,8 +3954,8 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no cluster-wide endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no local endpoints + wantClusterEndpoints: util.PortToLBEndpoints{}, // no cluster-wide endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no local endpoints }, { @@ -4005,9 +4006,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiple slices with a mix of IPv4 and IPv6 terminating (serving and non-serving) endpoints and no ready endpoints (dualstack cluster)", @@ -4055,9 +4056,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.3"}, V6IPs: []string{"2001:db2::3"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.3"}, V6IPs: []string{"2001:db2::3"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiple slices with a mix of IPv4 and IPv6 terminating non-serving endpoints (dualstack cluster)", @@ -4104,8 +4105,8 @@ func Test_getEndpointsForService(t *testing.T) { nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no endpoints + wantClusterEndpoints: util.PortToLBEndpoints{}, // no endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no endpoints }, { name: "multiple slices with a mix of IPv4 and IPv6 ready and terminating (serving and non-serving) endpoints (dualstack cluster) and service.PublishNotReadyAddresses=true", @@ -4155,21 +4156,26 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndPublishNotReadyAddresses("tcp-example", 80, tcp), // <-- publishNotReadyAddresses=true nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): { + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): { V4IPs: []string{"10.0.0.2", "10.0.0.3", "10.0.0.4"}, V6IPs: []string{"2001:db2::2", "2001:db2::3", "2001:db2::4"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - portToClusterEndpoints, portToNodeToEndpoints := getEndpointsForService( - tt.args.slices, tt.args.svc, tt.args.nodes, types.DefaultNetworkName) + needsLocalEndpoints := util.ServiceExternalTrafficPolicyLocal(tt.args.svc) || util.ServiceInternalTrafficPolicyLocal(tt.args.svc) + portToClusterEndpoints, portToNodeToEndpoints, err := util.GetEndpointsForService( + tt.args.slices, tt.args.svc, tt.args.nodes, true, needsLocalEndpoints) assert.Equal(t, tt.wantClusterEndpoints, portToClusterEndpoints) assert.Equal(t, tt.wantNodeEndpoints, portToNodeToEndpoints) - + if tt.wantError == nil { + assert.NoError(t, err) + } else { + assert.ErrorContains(t, err, tt.wantError.Error()) + } }) } } @@ -4190,12 +4196,12 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, @@ -4215,13 +4221,13 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1", "192.168.1.1"}, V6IPs: []string{"fe00:0:0:0:1::2", "fe00:0:0:0:2::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, @@ -4242,13 +4248,13 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, @@ -4268,7 +4274,7 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.1.1"}, // on nodeB V6IPs: []string{"fe00:0:0:0:2::2"}, // on nodeB Port: 8080, diff --git a/go-controller/pkg/ovn/controller/services/services_controller.go b/go-controller/pkg/ovn/controller/services/services_controller.go index 3f4275e028..21bae7fc9b 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller.go +++ b/go-controller/pkg/ovn/controller/services/services_controller.go @@ -464,7 +464,7 @@ func (c *Controller) syncService(key string) error { } // Build the abstract LB configs for this service - perNodeConfigs, templateConfigs, clusterConfigs := buildServiceLBConfigs(service, endpointSlices, c.nodeInfos, c.useLBGroups, c.useTemplates, c.netInfo.GetNetworkName()) + perNodeConfigs, templateConfigs, clusterConfigs := buildServiceLBConfigs(service, endpointSlices, c.nodeInfos, c.useLBGroups, c.useTemplates) klog.V(5).Infof("Built service %s LB cluster-wide configs for network=%s: %#v", key, c.netInfo.GetNetworkName(), clusterConfigs) klog.V(5).Infof("Built service %s LB per-node configs for network=%s: %#v", key, c.netInfo.GetNetworkName(), perNodeConfigs) klog.V(5).Infof("Built service %s LB template configs for network=%s: %#v", key, c.netInfo.GetNetworkName(), templateConfigs) diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 7fb84610ea..0e030a4ae3 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -867,13 +867,6 @@ func GetEligibleEndpointAddressesFromSlices(endpointSlices []*discovery.Endpoint return getEligibleEndpointAddresses(getEndpointsFromEndpointSlices(endpointSlices), service, "") } -// GetLocalEligibleEndpointAddressesFromSlices returns a set of IP addresses of endpoints that are local to the specified node -// and are eligible. -func GetLocalEligibleEndpointAddressesFromSlices(endpointSlices []*discovery.EndpointSlice, service *corev1.Service, nodeName string) sets.Set[string] { - endpoints := getEligibleEndpointAddresses(getEndpointsFromEndpointSlices(endpointSlices), service, nodeName) - return sets.New(endpoints...) -} - // DoesEndpointSliceContainEndpoint returns true if the endpointslice // contains an endpoint with the given IP, port and Protocol and if this endpoint is considered eligible. func DoesEndpointSliceContainEligibleEndpoint(endpointSlice *discovery.EndpointSlice, @@ -893,7 +886,8 @@ func DoesEndpointSliceContainEligibleEndpoint(endpointSlice *discovery.EndpointS // HasLocalHostNetworkEndpoints returns true if any of the nodeAddresses appear in given the set of // localEndpointAddresses. This is useful to check whether any of the provided local endpoints are host-networked. -func HasLocalHostNetworkEndpoints(localEndpointAddresses sets.Set[string], nodeAddresses []net.IP) bool { +func HasLocalHostNetworkEndpoints(portToLBEndpoints PortToLBEndpoints, nodeAddresses []net.IP) bool { + localEndpointAddresses := portToLBEndpoints.GetAddresses() if len(localEndpointAddresses) == 0 || len(nodeAddresses) == 0 { return false } diff --git a/go-controller/pkg/util/kube_test.go b/go-controller/pkg/util/kube_test.go index d48da80bd4..63c0478768 100644 --- a/go-controller/pkg/util/kube_test.go +++ b/go-controller/pkg/util/kube_test.go @@ -13,7 +13,6 @@ import ( discovery "k8s.io/api/discovery/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8stypes "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/sets" clientsetfake "k8s.io/client-go/kubernetes/fake" "k8s.io/utils/ptr" @@ -610,22 +609,26 @@ func TestHasLocalHostNetworkEndpoints(t *testing.T) { nodeAddresses := []net.IP{ep1IP} var tests = []struct { name string - localEndpoints sets.Set[string] + localEndpoints PortToLBEndpoints want bool }{ { "Tests with local endpoints that include the node address", - sets.New(ep1Address, ep2Address), + PortToLBEndpoints{"test": LBEndpoints{ + V4IPs: []string{ep1Address, ep2Address}, + }}, true, }, { "Tests against a different local endpoint than the node address", - sets.New(ep2Address), + PortToLBEndpoints{"test": LBEndpoints{ + V4IPs: []string{ep2Address}, + }}, false, }, { "Tests against no local endpoints", - sets.New[string](), + PortToLBEndpoints{"test": LBEndpoints{}}, false, }, } diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index fda62d3fcd..ef9d461518 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -7,6 +7,7 @@ import ( "hash/fnv" "net" "regexp" + "slices" "sort" "strconv" "strings" @@ -14,6 +15,7 @@ import ( "github.com/urfave/cli/v2" "golang.org/x/exp/constraints" + "golang.org/x/exp/maps" corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" @@ -679,3 +681,391 @@ func MustParseCIDR(cidr string) *net.IPNet { } return ipNet } + +// GetServicePortKey creates a unique identifier key for a service port using protocol and name. +// e.g. GetServicePortKey("TCP", "http") returns "TCP/http". +func GetServicePortKey(protocol corev1.Protocol, name string) string { + return fmt.Sprintf("%s/%s", protocol, name) +} + +// IPPort represents an IP address and port combination for load balancer destinations. +// e.g. IPPort{IP: "192.168.1.10", Port: 8080}. +type IPPort struct { + IP string + Port int32 +} + +// LBEndpoints contains load balancer endpoint information with IPv4 and IPv6 addresses. +// Port is the endpoint port (the one exposed by the pod) and IPs are the IP addresses of the backend pods. +// e.g. LBEndpoints{Port: 8080, V4IPs: []string{"192.168.1.10", "192.168.1.11"}, V6IPs: []string{"2001:db8::1"}}. +// TBD: currently, OVNK only supports a single backend port per named port. +type LBEndpoints struct { + Port int32 + V4IPs []string + V6IPs []string +} + +// GetV4Destinations builds IPv4 destination mappings from endpoint addresses to ports. +// e.g. for V4IPs ["192.168.1.10", "192.168.1.11"] and Port 8080, returns +// []IPPort{{IP: "192.168.1.10", Port: 8080}, {IP: "192.168.1.11", Port: 8080}}. +func (le LBEndpoints) GetV4Destinations() []IPPort { + destinations := []IPPort{} + for _, ip := range le.V4IPs { + destinations = append(destinations, IPPort{IP: ip, Port: le.Port}) + } + return destinations +} + +// GetV6Destinations builds IPv6 destination mappings from endpoint addresses to ports. +// e.g. for V6IPs ["2001:db8::1", "2001:db8::2"] and Port 8080, returns +// []IPPort{{IP: "2001:db8::1", Port: 8080}, {IP: "2001:db8::2", Port: 8080}}. +func (le LBEndpoints) GetV6Destinations() []IPPort { + destinations := []IPPort{} + for _, ip := range le.V6IPs { + destinations = append(destinations, IPPort{IP: ip, Port: le.Port}) + } + return destinations +} + +// PortToLBEndpoints maps service port keys (protocol + service port name) to load balancer endpoints. +// e.g. map["TCP/http"] = LBEndpoints{Port: 8080, V4IPs: []string{"192.168.1.10"}}. +// Port is the endpoint port (the one exposed by the pod) and IPs are the IP addresses of the backend pods. +type PortToLBEndpoints map[string]LBEndpoints + +// GetAddresses returns all unique IP addresses from all ports in the PortToLBEndpoints map. +// e.g. for PortToLBEndpoints{"TCP/http": {Port: 8080, V4IPs: ["192.168.1.10"]}, "UDP/dns": {Port: 53, V4IPs: ["192.168.1.11"]}}, +// returns sets.Set{"192.168.1.10", "192.168.1.11"}. +func (p PortToLBEndpoints) GetAddresses() sets.Set[string] { + s := sets.New[string]() + for _, lbEndpoints := range p { + s.Insert(lbEndpoints.V4IPs...) + s.Insert(lbEndpoints.V6IPs...) + } + return s +} + +// PortToNodeToLBEndpoints maps service port keys to node names and their load balancer endpoints. +// e.g. map["TCP/http"]["node1"] = LBEndpoints{Port: 8080, V4IPs: []string{"192.168.1.10"}}. +type PortToNodeToLBEndpoints map[string]map[string]LBEndpoints + +// GetNode extracts all port endpoints for a specific node from the PortToNodeToLBEndpoints map. +// e.g. for PortToNodeToLBEndpoints{"TCP/http": {"node1": {Port: 8080, V4IPs: ["192.168.1.10"]}, "node2": {Port: 8080, V4IPs: ["192.168.1.11"]}}} +// and node "node1", returns PortToLBEndpoints{"TCP/http": {Port: 8080, V4IPs: ["192.168.1.10"]}}. +func (p PortToNodeToLBEndpoints) GetNode(node string) PortToLBEndpoints { + r := make(PortToLBEndpoints) + for port, nodeToLBEndpoints := range p { + if lbe, ok := nodeToLBEndpoints[node]; ok { + r[port] = lbe + } + } + return r +} + +// GetEndpointsForService extracts endpoints from EndpointSlices for a given Service. +// It returns two maps. +// 1. Global endpoints: maps service port keys ("protocol/portname") to all endpoint addresses (if needsGlobalEndpoints). +// 2. Local endpoints: maps service port keys to per-node endpoint addresses (if needsLocalEndpoints). +// +// This method is common logic for both nodePortWatcher and the services Controller to build their +// service endpoints +// +// Special handling when service is nil: +// When service is nil (typically during deletion scenarios), all endpoint ports are accepted +// and processed without validation against service port specifications. This allows cleanup +// operations to proceed even when the service object is no longer available (needed for +// the nodePortWatcher). +// +// When service is not nil: +// Endpoint ports are validated against the service port specifications, and only matching +// ports are processed to ensure consistency with the service configuration. +// +// Parameters: +// - slices: EndpointSlices associated with the service +// - service: The Kubernetes Service object (nil during deletion scenarios) +// - nodes: Set of node names in the OVN zone (used to filter local endpoints) +// - needsGlobalEndpoints: request to populate PortToLBEndpoints +// - needsLocalEndpoints: request to populate PortToNodeToLBEndpoints +// +// Returns: +// - PortToLBEndpoints: Global endpoint mapping by port (empty if not needed) +// - PortToNodeToLBEndpoints: Per-node endpoint mapping by port (empty if not needed) +// - error: Validation errors encountered during processing +// +// Example output: +// +// Global: {"TCP/http": {Port: 8080, V4IPs: ["192.168.1.10", "192.168.1.11"]}} +// Local: {"TCP/http": {"node1": {Port: 8080, V4IPs: ["192.168.1.10"]}}} +// +// Validation requirements: +// - EndpointSlice port names must match Service port names (when service is not nil) +// - Only one protocol per port name is supported (Kubernetes requirement). +// - Only one target port number per protocol/port combination is supported (OVNKubernetes limitation). +func GetEndpointsForService(endpointSlices []*discoveryv1.EndpointSlice, service *corev1.Service, + nodes sets.Set[string], needsGlobalEndpoints, needsLocalEndpoints bool) (PortToLBEndpoints, PortToNodeToLBEndpoints, error) { + + var validationErrors []error + globalEndpoints := make(PortToLBEndpoints) + localEndpoints := make(PortToNodeToLBEndpoints) + + addValidationError := func(msg string, detail interface{}) { + ns, name := "", "" + if service != nil { + ns, name = service.GetNamespace(), service.GetName() + } + validationErrors = append(validationErrors, fmt.Errorf("%s for service \"%s/%s\": %v", msg, ns, name, detail)) + } + + // Parse endpoint slices into structured format: portName -> protocol -> portNumber -> endpoints. + targetEndpoints := newTargetEndpoints(endpointSlices) + + // Build list of valid service port keys, if a non-nil service was provided. Otherwise, if service is nil, this + // is a deletion (at least for the iptables / nftables logic) and thus accept all endpoints. + validServicePortKeys := map[string]bool{} + if service != nil { + for _, servicePort := range service.Spec.Ports { + name := GetServicePortKey(servicePort.Protocol, servicePort.Name) + validServicePortKeys[name] = true + } + } + + for portName, protocolMap := range targetEndpoints { + for protocol, portNumberMap := range protocolMap { + // If service is not nil, there's a valid 1 to 1 mapping of service name + protocol to slice name + protocol. + // Therefore, go through all ports of the service, and skip if no match found. + slicePortKey := GetServicePortKey(protocol, portName) + if service != nil && !validServicePortKeys[slicePortKey] { + continue + } + + if len(portNumberMap) == 0 { + // Return an error here as this should not happen. + addValidationError("service protocol has no associated endpoints", + fmt.Sprintf("servicePortKey %q", slicePortKey)) + continue + } + + // Process the first (and typically only) target port number. + // OVN currently does not support multiple target port numbers for the same service name. + portNumbers := maps.Keys(portNumberMap) + slices.Sort(portNumbers) + if len(portNumbers) > 1 { + addValidationError("OVN Kubernetes does not support more than one target port per service port", + fmt.Sprintf("servicePortKey %q portNumbers %v", + slicePortKey, portNumbers)) + } + targetPortNumber := portNumbers[0] + endpointList := portNumberMap[targetPortNumber] + // Build global endpoint mapping. + if needsGlobalEndpoints { + lbe, err := buildLBEndpoints(service, targetPortNumber, endpointList) + if err != nil { + klog.Warningf("Failed to build global endpoints for port %s: %v", slicePortKey, err) + continue + } + globalEndpoints[slicePortKey] = lbe + } + // Build per-node endpoint mapping if needed for traffic policies. + if needsLocalEndpoints { + if lbe, err := buildNodeLBEndpoints(service, targetPortNumber, endpointList, nodes); err == nil { + localEndpoints[slicePortKey] = lbe + } + } + } + } + + // Log endpoint mappings for debugging. + serviceString := "" + if service != nil { + serviceString = fmt.Sprintf(" for %s/%s", service.Namespace, service.Name) + } + if needsGlobalEndpoints { + klog.V(5).Infof("Global endpoints%s: %v", serviceString, globalEndpoints) + } + if needsLocalEndpoints { + klog.V(5).Infof("Local endpoints%s: %v", serviceString, localEndpoints) + } + + return globalEndpoints, localEndpoints, errors.Join(validationErrors...) +} + +// groupEndpointsByNode organizes a list of endpoints by their associated node names. +// Endpoints without a NodeName are skipped, as they cannot be assigned to specific nodes. +// This is used for building per-node endpoint mappings for local traffic policies. +// +// Parameters: +// - endpoints: List of discovery endpoints to group +// +// Returns: +// - map[string][]discoveryv1.Endpoint: Node name to endpoints mapping +func groupEndpointsByNode(endpoints []discoveryv1.Endpoint) map[string][]discoveryv1.Endpoint { + nodeEndpoints := map[string][]discoveryv1.Endpoint{} + for _, endpoint := range endpoints { + if endpoint.NodeName == nil { + continue + } + nodeName := *endpoint.NodeName + nodeEndpoints[nodeName] = append(nodeEndpoints[nodeName], endpoint) + } + return nodeEndpoints +} + +// buildNodeLBEndpoints creates a per-node mapping of load balancer endpoints. +// Only nodes present in the provided node set are included in the result. +// This is used for services with local traffic policies that require per-node endpoint tracking. +// +// Parameters: +// - service: The Kubernetes Service object (for endpoint filtering) +// - portNumber: The target port number for the endpoints +// - endpoints: List of endpoints to process +// - nodes: Set of valid node names to include +// +// Returns: +// - map[string]LBEndpoints: Node name to LBEndpoints mapping +func buildNodeLBEndpoints(service *corev1.Service, portNumber int32, endpoints []discoveryv1.Endpoint, nodes sets.Set[string]) (map[string]LBEndpoints, error) { + nodeLBEndpoints := map[string]LBEndpoints{} + + nodeEndpoints := groupEndpointsByNode(endpoints) + for node, nodeEndpoints := range nodeEndpoints { + if !nodes.Has(node) { + continue + } + lbe, err := buildLBEndpoints(service, portNumber, nodeEndpoints) + if err != nil { + klog.Warningf("Failed to build node endpoints for node %s port %d: %v", node, portNumber, err) + continue + } + nodeLBEndpoints[node] = lbe + } + + if len(nodeLBEndpoints) == 0 { + return nodeLBEndpoints, fmt.Errorf("empty node lb endpoints") + } + return nodeLBEndpoints, nil +} + +// buildLBEndpoints constructs an LBEndpoints structure from a list of discovery endpoints. +// It filters endpoints for eligibility, separates IPv4 and IPv6 addresses, and returns +// an empty LBEndpoints if no valid addresses are found. +// +// Parameters: +// - service: The Kubernetes Service object (used for endpoint eligibility filtering) +// service may be nil! +// - port: The target port number for the endpoints +// - endpoints: List of discovery endpoints to process +// +// Returns: +// - LBEndpoints: Structure containing IPv4/IPv6 addresses and port number +func buildLBEndpoints(service *corev1.Service, port int32, endpoints []discoveryv1.Endpoint) (LBEndpoints, error) { + addresses := GetEligibleEndpointAddresses(endpoints, service) + v4IPs, v4ErrorNoIP := MatchAllIPStringFamily(false, addresses) + v6IPs, v6ErrorNoIP := MatchAllIPStringFamily(true, addresses) + + if v4ErrorNoIP != nil && v6ErrorNoIP != nil { + if service != nil { + return LBEndpoints{}, fmt.Errorf("empty IP address endpoints for service %s/%s", service.Namespace, service.Name) + } else { + return LBEndpoints{}, fmt.Errorf("empty IP address endpoints") + } + } + + if port <= 0 || port > 65535 { + if service != nil { + return LBEndpoints{}, fmt.Errorf("invalid endpoint port %d for service %s/%s: port must be between 1-65535", + port, service.Namespace, service.Name) + } else { + return LBEndpoints{}, fmt.Errorf("invalid endpoint port %d: port must be between 1-65535", port) + } + } + + return LBEndpoints{ + V4IPs: v4IPs, + V6IPs: v6IPs, + Port: port, + }, nil +} + +// targetEndpoints provides a hierarchical mapping of endpoint data from EndpointSlices. +// Structure: port name -> protocol -> port number -> list of endpoints +// Example: +// +// targetEndpoints{ +// "http": { +// corev1.ProtocolTCP: { +// 8080: []discoveryv1.Endpoint{...} +// } +// } +// } +type targetEndpoints map[string]map[corev1.Protocol]map[int32][]discoveryv1.Endpoint + +// addEndpoint adds a discovery endpoint to the TargetEndpoints structure. +// It initializes nested maps as needed to maintain the hierarchical structure. +// Multiple endpoints can be added for the same port name/protocol/port number combination. +// +// Parameters: +// - portName: Name of the port (can be empty string for unnamed ports) +// - proto: Protocol (TCP, UDP, SCTP) +// - portNumber: Target port number +// - endpoint: The discovery endpoint to add +func (te targetEndpoints) addEndpoint(portName string, proto corev1.Protocol, portNumber int32, endpoint discoveryv1.Endpoint) { + if _, ok := te[portName]; !ok { + te[portName] = make(map[corev1.Protocol]map[int32][]discoveryv1.Endpoint) + } + if _, ok := te[portName][proto]; !ok { + te[portName][proto] = make(map[int32][]discoveryv1.Endpoint) + } + if _, ok := te[portName][proto][portNumber]; !ok { + te[portName][proto][portNumber] = []discoveryv1.Endpoint{} + } + te[portName][proto][portNumber] = append(te[portName][proto][portNumber], endpoint) +} + +// newTargetEndpoints constructs a TargetEndpoints structure from a list of EndpointSlices. +// It processes all endpoints from all slices, organizing them by port name, protocol, and port number. +// FQDN address types are skipped. +// +// Parameters: +// - slices: List of EndpointSlices to process +// +// Returns: +// - TargetEndpoints: Hierarchically organized endpoint data +func newTargetEndpoints(slices []*discoveryv1.EndpointSlice) targetEndpoints { + te := targetEndpoints{} + + for _, slice := range slices { + if slice == nil { + continue + } + + if slice.AddressType == discoveryv1.AddressTypeFQDN { + continue // consider only v4 and v6, discard FQDN + } + + for _, slicePort := range slice.Ports { + // Protocol and Port should never be nil per API; thus ignore invalid entries and log. + if slicePort.Protocol == nil || slicePort.Port == nil { + klog.Warningf("Skipped invalid slice port %+v belonging to slice %+v", slicePort, slice) + continue + } + portName := getPortName(slicePort.Name) + for _, endpoint := range slice.Endpoints { + te.addEndpoint(portName, *slicePort.Protocol, *slicePort.Port, endpoint) + } + } + } + return te +} + +// getPortName safely extracts the port name from a pointer, returning empty string if nil. +// This handles the case where EndpointSlice ports may have unnamed ports. +// +// Parameters: +// - name: Pointer to port name (may be nil) +// +// Returns: +// - string: Port name or empty string if nil +func getPortName(name *string) string { + if name == nil { + return "" + } + return *name +} From 282b01ec669fa0311e46acbb22a48bc687a99a1d Mon Sep 17 00:00:00 2001 From: Andreas Karis Date: Tue, 23 Sep 2025 20:27:27 +0200 Subject: [PATCH 08/20] Extends unit test coverage for named port handling in ETP local Adds tests for loadBalancer services with named ports and AllocateLoadBalancerNodePorts=False. Add new test cases in Test_getEndpointsForService. Signed-off-by: Andreas Karis --- .../pkg/node/gateway_localnet_linux_test.go | 149 ++++++++++ .../ovn/controller/services/lb_config_test.go | 280 +++++++++++++++++- go-controller/pkg/testing/kube.go | 12 + 3 files changed, 438 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/node/gateway_localnet_linux_test.go b/go-controller/pkg/node/gateway_localnet_linux_test.go index c67958f383..71d112b491 100644 --- a/go-controller/pkg/node/gateway_localnet_linux_test.go +++ b/go-controller/pkg/node/gateway_localnet_linux_test.go @@ -999,6 +999,155 @@ var _ = Describe("Node Operations", func() { Expect(app.Run([]string{app.Name})).To(Succeed()) }) + It("inits iptables rules and openflows with named port and AllocateLoadBalancerNodePorts=False, ETP=local, LGW mode", func() { + app.Action = func(*cli.Context) error { + minNFakeCommands := nInitialFakeCommands + 1 + fExec.AddRepeatedFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ovs-ofctl show ", + }, minNFakeCommands) + + config.Gateway.Mode = config.GatewayModeLocal + svcPortName := "https-port" + svcPortValue := int32(8080) + svcProtocol := corev1.ProtocolTCP + svcTargetPortName := "https-target" + svcAllocateLoadBalancerNodePorts := false + svcStatusIP := "192.168.0.10" + svcStatusIPMode := corev1.LoadBalancerIPModeVIP + + epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP + + nodeName := "node" + + service := *newService("service1", "namespace1", "10.129.0.2", + []corev1.ServicePort{ + { + Name: svcPortName, + Port: svcPortValue, + Protocol: svcProtocol, + TargetPort: intstr.FromString(svcTargetPortName), + }, + }, + corev1.ServiceTypeLoadBalancer, + nil, + corev1.ServiceStatus{ + LoadBalancer: corev1.LoadBalancerStatus{ + Ingress: []corev1.LoadBalancerIngress{ + { + IP: svcStatusIP, + IPMode: &svcStatusIPMode, + }, + }, + }, + }, + true, false, + ) + service.Spec.AllocateLoadBalancerNodePorts = &svcAllocateLoadBalancerNodePorts + ep1 := discovery.Endpoint{ + Addresses: []string{"10.244.0.3"}, + NodeName: &nodeName, + } + ep2 := discovery.Endpoint{ + Addresses: []string{"10.244.0.4"}, + NodeName: &nodeName, + } + epPort1 := discovery.EndpointPort{ + Name: &svcPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, + } + // endpointSlice.Endpoints is ovn-networked so this will + // come under !hasLocalHostNetEp case + endpointSlice := *newEndpointSlice( + "service1", + "namespace1", + []discovery.Endpoint{ep1, ep2}, + []discovery.EndpointPort{epPort1}, + ) + + stopChan := make(chan struct{}) + fakeClient := util.GetOVNClientset(&service, &endpointSlice).GetNodeClientset() + wf, err := factory.NewNodeWatchFactory(fakeClient, nodeName) + Expect(err).ToNot(HaveOccurred()) + Expect(wf.Start()).To(Succeed()) + defer func() { + close(stopChan) + wf.Shutdown() + }() + + fNPW.watchFactory = wf + Expect(startNodePortWatcher(fNPW, fakeClient)).To(Succeed()) + + expectedTables := map[string]util.FakeTable{ + "nat": { + "PREROUTING": []string{ + "-j OVN-KUBE-ETP", + "-j OVN-KUBE-EXTERNALIP", + "-j OVN-KUBE-NODEPORT", + }, + "OUTPUT": []string{ + "-j OVN-KUBE-EXTERNALIP", + "-j OVN-KUBE-NODEPORT", + "-j OVN-KUBE-ITP", + }, + "OVN-KUBE-NODEPORT": []string{}, + "OVN-KUBE-EXTERNALIP": []string{ + fmt.Sprintf("-p %s -d %s --dport %d -j DNAT --to-destination %s:%v", + service.Spec.Ports[0].Protocol, + service.Status.LoadBalancer.Ingress[0].IP, + service.Spec.Ports[0].Port, + service.Spec.ClusterIP, + service.Spec.Ports[0].Port), + }, + "OVN-KUBE-ETP": []string{ + fmt.Sprintf("-p %s -d %s --dport %d -j DNAT --to-destination %s:%d -m statistic --mode random --probability 0.5000000000", + service.Spec.Ports[0].Protocol, + service.Status.LoadBalancer.Ingress[0].IP, + service.Spec.Ports[0].Port, + endpointSlice.Endpoints[0].Addresses[0], + *endpointSlice.Ports[0].Port), + fmt.Sprintf("-p %s -d %s --dport %d -j DNAT --to-destination %s:%d -m statistic --mode random --probability 1.0000000000", + service.Spec.Ports[0].Protocol, + service.Status.LoadBalancer.Ingress[0].IP, + service.Spec.Ports[0].Port, + endpointSlice.Endpoints[1].Addresses[0], + *endpointSlice.Ports[0].Port), + }, + "OVN-KUBE-ITP": []string{}, + }, + "filter": {}, + "mangle": { + "OUTPUT": []string{ + "-j OVN-KUBE-ITP", + }, + "OVN-KUBE-ITP": []string{}, + }, + } + + f4 := iptV4.(*util.FakeIPTables) + err = f4.MatchState(expectedTables, nil) + Expect(err).NotTo(HaveOccurred()) + + expectedNFT := getBaseNFTRules(types.K8sMgmtIntfName) + expectedNFT += fmt.Sprintf("add element inet ovn-kubernetes mgmtport-no-snat-services-v4 { %s . tcp . %v }\n"+ + "add element inet ovn-kubernetes mgmtport-no-snat-services-v4 { %s . tcp . %v }\n", + endpointSlice.Endpoints[1].Addresses[0], + *endpointSlice.Ports[0].Port, + endpointSlice.Endpoints[0].Addresses[0], + *endpointSlice.Ports[0].Port) + err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Expect(err).NotTo(HaveOccurred()) + + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") + Expect(flows).To(BeNil()) + + return nil + } + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + It("inits iptables rules and openflows with LoadBalancer where ETP=cluster, LGW mode", func() { app.Action = func(*cli.Context) error { externalIP := "1.1.1.1" diff --git a/go-controller/pkg/ovn/controller/services/lb_config_test.go b/go-controller/pkg/ovn/controller/services/lb_config_test.go index 1e30c7c302..ee530bc1ec 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config_test.go +++ b/go-controller/pkg/ovn/controller/services/lb_config_test.go @@ -60,10 +60,10 @@ func getSampleService(publishNotReadyAddresses bool) *corev1.Service { } } -func getServicePort(name string, _ int32, protocol corev1.Protocol) corev1.ServicePort { +func getServicePort(name string, port int32, protocol corev1.Protocol) corev1.ServicePort { return corev1.ServicePort{ Name: name, - TargetPort: intstr.FromInt(int(httpPortValue)), + TargetPort: intstr.FromInt(int(port)), Protocol: protocol, } } @@ -4163,10 +4163,284 @@ func Test_GetEndpointsForService(t *testing.T) { wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, + // According to https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports, the following + // should be supported. However, in OVNK, this was never implemented. + { + name: "multiple slices with same port name, different ports, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.0.0.2", "10.1.1.2"), + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab24", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(8080)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.0.0.3", "10.2.2.3"), + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), + nodes: sets.New(nodeA), // one-node zone + }, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}}, + wantError: fmt.Errorf("OVN Kubernetes does not support more than one target port per service port for " + + "service \"test/service-test\": servicePortKey \"TCP/tcp-example\" portNumbers [80 8080]"), + }, + // The following is not supported by Kubernetes - OVNK will just ignore this and look up the matching + // protocol (TCP) only. + { + name: "multiple slices with same port name, different protocols, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.0.0.2", "10.1.1.2"), + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab24", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolUDP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.0.0.3", "10.2.2.3"), + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), + nodes: sets.New(nodeA), // one-node zone + }, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}}, + }, + // The following should never happen in k8s - endpoints should not be empty. + { + name: "multiple slices with same port name, empty endpoints, invalid ports, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{kubetest.MakeUnassignedEndpoint("10.1.1.2")}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{}, + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), + nodes: sets.New(nodeA), // one-node zone + }, + wantClusterEndpoints: util.PortToLBEndpoints{"TCP/tcp-example": util.LBEndpoints{Port: 80, V4IPs: []string{"10.1.1.2"}, V6IPs: []string(nil)}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, + }, + // The following should never happen in k8s - invalid port number. + { + name: "single slices, invalid port number, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(-2)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{kubetest.MakeUnassignedEndpoint("10.1.1.2")}, + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), + nodes: sets.New(nodeA), // one-node zone + }, + wantClusterEndpoints: util.PortToLBEndpoints{}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, + }, + { + name: "nil service without endpoints, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{}, + svc: nil, + nodes: sets.New(nodeA), + }, + wantClusterEndpoints: util.PortToLBEndpoints{}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, + }, + { + name: "multiple slices, nil service with endpoints, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.1.1.2"), + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example2"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.1.1.3"), + }, + }, + svc: nil, + nodes: sets.New(nodeA), + }, + wantClusterEndpoints: util.PortToLBEndpoints{ + "TCP/tcp-example": util.LBEndpoints{Port: 80, V4IPs: []string{"10.1.1.2"}, V6IPs: []string(nil)}, + "TCP/tcp-example2": util.LBEndpoints{Port: 80, V4IPs: []string{"10.1.1.3"}, V6IPs: []string(nil)}, + }, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + "TCP/tcp-example": map[string]util.LBEndpoints{"node-a": {Port: 80, V4IPs: []string{"10.1.1.2"}, V6IPs: []string(nil)}}, + "TCP/tcp-example2": map[string]util.LBEndpoints{"node-a": {Port: 80, V4IPs: []string{"10.1.1.3"}, V6IPs: []string(nil)}}, + }, + }, + { + name: "multiple slices, service selects correct slice, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.1.1.2"), + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example2"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.1.1.3"), + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example2", 80, tcp), + nodes: sets.New(nodeA), + }, + wantClusterEndpoints: util.PortToLBEndpoints{ + "TCP/tcp-example2": util.LBEndpoints{Port: 80, V4IPs: []string{"10.1.1.3"}, V6IPs: []string(nil)}, + }, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + "TCP/tcp-example2": map[string]util.LBEndpoints{"node-a": {Port: 80, V4IPs: []string{"10.1.1.3"}, V6IPs: []string(nil)}}, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - needsLocalEndpoints := util.ServiceExternalTrafficPolicyLocal(tt.args.svc) || util.ServiceInternalTrafficPolicyLocal(tt.args.svc) + needsLocalEndpoints := tt.args.svc == nil || util.ServiceExternalTrafficPolicyLocal(tt.args.svc) || util.ServiceInternalTrafficPolicyLocal(tt.args.svc) portToClusterEndpoints, portToNodeToEndpoints, err := util.GetEndpointsForService( tt.args.slices, tt.args.svc, tt.args.nodes, true, needsLocalEndpoints) assert.Equal(t, tt.wantClusterEndpoints, portToClusterEndpoints) diff --git a/go-controller/pkg/testing/kube.go b/go-controller/pkg/testing/kube.go index d47564d61a..29e5f260dd 100644 --- a/go-controller/pkg/testing/kube.go +++ b/go-controller/pkg/testing/kube.go @@ -52,6 +52,18 @@ func MakeTerminatingNonServingEndpoint(node string, addresses ...string) discove } } +func MakeUnassignedEndpoint(addresses ...string) discovery.Endpoint { + return discovery.Endpoint{ + Conditions: discovery.EndpointConditions{ + Ready: ptr.To(true), + Serving: ptr.To(true), + Terminating: ptr.To(false), + }, + Addresses: addresses, + NodeName: nil, + } +} + func MirrorEndpointSlice(defaultEndpointSlice *discovery.EndpointSlice, network string, keepEndpoints bool) *discovery.EndpointSlice { mirror := defaultEndpointSlice.DeepCopy() mirror.Name = defaultEndpointSlice.Name + "-mirrored" From 651759c59efbf391956779d5b080654782659f9a Mon Sep 17 00:00:00 2001 From: Andreas Karis Date: Fri, 26 Sep 2025 17:51:49 +0200 Subject: [PATCH 09/20] E2E service: Move checkNumberOf.. to util.go Signed-off-by: Andreas Karis --- test/e2e/service.go | 36 ++++++++++++------------------------ test/e2e/util.go | 21 +++++++++++++++++++++ 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/test/e2e/service.go b/test/e2e/service.go index 6acfc77c8d..8fb4e84553 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -1911,30 +1911,18 @@ spec: time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly - checkNumberOfETPRules := func(value int, pattern string) wait.ConditionFunc { - return func() (bool, error) { - numberOfETPRules := pokeNodeIPTableRules(backendNodeName, pattern) - return (numberOfETPRules == value), nil - } - } - checkNumberOfNFTElements := func(value int, name string) wait.ConditionFunc { - return func() (bool, error) { - numberOfNFTElements := countNFTablesElements(backendNodeName, name) - return (numberOfNFTElements == value), nil - } - } noSNATServicesSet := "mgmtport-no-snat-services-v4" if utilnet.IsIPv6String(svcLoadBalancerIP) { noSNATServicesSet = "mgmtport-no-snat-services-v6" } - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(2, "OVN-KUBE-ETP")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 2, "OVN-KUBE-ETP")) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(5, "OVN-KUBE-EXTERNALIP")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 5, "OVN-KUBE-EXTERNALIP")) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(0, noSNATServicesSet)) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, noSNATServicesSet)) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(0, "mgmtport-no-snat-nodeports")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) @@ -1958,11 +1946,11 @@ spec: time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(10, "OVN-KUBE-ETP")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 10, "OVN-KUBE-ETP")) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(8, noSNATServicesSet)) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 8, noSNATServicesSet)) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(0, "mgmtport-no-snat-nodeports")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) @@ -1974,7 +1962,7 @@ spec: if utilnet.IsIPv6String(svcLoadBalancerIP) { pktSize = 80 } - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) // FIXME: This used to check that the no-snat rule had been hit, but nftables // doesn't attach counters to rules unless you explicitly request them, which @@ -1988,11 +1976,11 @@ spec: // number of rules/elements should have decreased by 2 (one for the TCP port, // one for UDP) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(8, "OVN-KUBE-ETP")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 8, "OVN-KUBE-ETP")) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(6, noSNATServicesSet)) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 6, noSNATServicesSet)) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(0, "mgmtport-no-snat-nodeports")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) @@ -2000,7 +1988,7 @@ spec: _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") framework.ExpectNoError(err, "failed to curl load balancer service") - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) // FIXME: This used to check that the no-snat rule had been hit, but nftables // doesn't attach counters to rules unless you explicitly request them, which diff --git a/test/e2e/util.go b/test/e2e/util.go index 6523c367a7..dfa8d56e69 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1004,6 +1004,27 @@ func countNFTablesElements(nodeName, name string) int { return count } +func checkNumberOfETPRules(backendNodeName string, value int, pattern string) wait.ConditionFunc { + return func() (bool, error) { + numberOfETPRules := pokeNodeIPTableRules(backendNodeName, pattern) + isExpected := numberOfETPRules == value + if !isExpected { + framework.Logf("numberOfETPRules got: %d, expected: %d", numberOfETPRules, value) + } + return isExpected, nil + } +} +func checkNumberOfNFTElements(backendNodeName string, value int, name string) wait.ConditionFunc { + return func() (bool, error) { + numberOfNFTElements := countNFTablesElements(backendNodeName, name) + isExpected := numberOfNFTElements == value + if !isExpected { + framework.Logf("numberOfNFTElements got: %d, expected: %d", numberOfNFTElements, value) + } + return isExpected, nil + } +} + // isDualStackCluster returns 'true' if at least one of the nodes has more than one node subnet. func isDualStackCluster(nodes *v1.NodeList) bool { for _, node := range nodes.Items { From 2fff3669b65ed60dc13b28e655ac93561029303e Mon Sep 17 00:00:00 2001 From: Andreas Karis Date: Fri, 26 Sep 2025 11:27:32 +0200 Subject: [PATCH 10/20] E2E service: Fix potential flake in conn to an ext IP using src port E2E test "Allow connection to an external IP using a source port that is equal to a node port" might flake if a service is already created with the same nodePort number. Give it a chance to recover by selecting a different port. Signed-off-by: Andreas Karis --- test/e2e/service.go | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/test/e2e/service.go b/test/e2e/service.go index 8fb4e84553..21ffed62f2 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -75,8 +75,8 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { }) ginkgo.It("Allow connection to an external IP using a source port that is equal to a node port", func() { + var nodePort int32 = 31990 const ( - nodePort = 31990 connTimeout = "2" dstIPv4 = "1.1.1.1" dstPort = "80" @@ -86,10 +86,21 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { } ginkgo.By("create node port service") jig := e2eservice.NewTestJig(cs, f.Namespace.Name, serviceName) - _, err := jig.CreateTCPService(context.TODO(), func(svc *v1.Service) { - svc.Spec.Type = v1.ServiceTypeNodePort - svc.Spec.Ports[0].NodePort = nodePort - }) + err := wait.PollImmediate(10*time.Second, 60*time.Second, + func() (bool, error) { + _, err := jig.CreateTCPService(context.TODO(), func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeNodePort + svc.Spec.Ports[0].NodePort = nodePort + }) + if err != nil { + if strings.Contains(err.Error(), "provided port is already allocated") { + nodePort++ + return false, nil + } + return false, err + } + return true, nil + }) framework.ExpectNoError(err, "failed to create TCP node port service") ginkgo.By("create pod selected by node port service") serverPod := e2epod.NewAgnhostPod(f.Namespace.Name, "svc-backend", nil, nil, nil) @@ -100,7 +111,7 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { e2epod.NewPodClient(f).CreateSync(context.TODO(), clientPod) ginkgo.By("connect externally pinning the source port to equal the node port") _, err = e2ekubectl.RunKubectl(clientPod.Namespace, "exec", clientPod.Name, "--", "nc", - "-p", strconv.Itoa(nodePort), "-z", "-w", connTimeout, dstIPv4, dstPort) + "-p", strconv.Itoa(int(nodePort)), "-z", "-w", connTimeout, dstIPv4, dstPort) framework.ExpectNoError(err, "expected connection to succeed using source port identical to node port") }) From 1ed9749fab83f3e9722ded3811be3776de7cb53c Mon Sep 17 00:00:00 2001 From: Dave Tucker Date: Tue, 14 Oct 2025 16:20:25 +0100 Subject: [PATCH 11/20] chore: Remove SetTaintOnNode Node-taints for too-small MTU were removed in #3004. Taints for NoSchedule were removed in #2459. In general, it's not the CNI plugins responsibility to set node taints. This is for the kubelet/container runtime to figure out. Therefore, it's safe to remove this unused code since it won't be required in future. Signed-off-by: Dave Tucker --- go-controller/pkg/kube/kube.go | 64 --------------- go-controller/pkg/kube/kube_test.go | 86 -------------------- go-controller/pkg/kube/mocks/Interface.go | 36 -------- go-controller/pkg/kube/mocks/InterfaceOVN.go | 18 ---- 4 files changed, 204 deletions(-) diff --git a/go-controller/pkg/kube/kube.go b/go-controller/pkg/kube/kube.go index 7eccec3d7f..6bb79e2311 100644 --- a/go-controller/pkg/kube/kube.go +++ b/go-controller/pkg/kube/kube.go @@ -55,8 +55,6 @@ type Interface interface { SetAnnotationsOnService(namespace, serviceName string, annotations map[string]interface{}) error SetAnnotationsOnNode(nodeName string, annotations map[string]interface{}) error SetAnnotationsOnNamespace(namespaceName string, annotations map[string]interface{}) error - SetTaintOnNode(nodeName string, taint *corev1.Taint) error - RemoveTaintFromNode(nodeName string, taint *corev1.Taint) error SetLabelsOnNode(nodeName string, labels map[string]interface{}) error PatchNode(old, new *corev1.Node) error UpdateNodeStatus(node *corev1.Node) error @@ -197,68 +195,6 @@ func (k *Kube) SetAnnotationsOnService(namespace, name string, annotations map[s return err } -// SetTaintOnNode tries to add a new taint to the node. If the taint already exists, it doesn't do anything. -func (k *Kube) SetTaintOnNode(nodeName string, taint *corev1.Taint) error { - node, err := k.GetNodeForWindows(nodeName) - if err != nil { - klog.Errorf("Unable to retrieve node %s for tainting %s: %v", nodeName, taint.ToString(), err) - return err - } - newNode := node.DeepCopy() - nodeTaints := newNode.Spec.Taints - - var newTaints []corev1.Taint - for i := range nodeTaints { - if taint.MatchTaint(&nodeTaints[i]) { - klog.Infof("Taint %s already exists on Node %s", taint.ToString(), node.Name) - return nil - } - newTaints = append(newTaints, nodeTaints[i]) - } - - klog.Infof("Setting taint %s on Node %s", taint.ToString(), node.Name) - newTaints = append(newTaints, *taint) - newNode.Spec.Taints = newTaints - err = k.PatchNode(node, newNode) - if err != nil { - klog.Errorf("Unable to add taint %s on node %s: %v", taint.ToString(), node.Name, err) - return err - } - - klog.Infof("Added taint %s on node %s", taint.ToString(), node.Name) - return nil -} - -// RemoveTaintFromNode removes all the taints that have the same key and effect from the node. -// If the taint doesn't exist, it doesn't do anything. -func (k *Kube) RemoveTaintFromNode(nodeName string, taint *corev1.Taint) error { - node, err := k.GetNodeForWindows(nodeName) - if err != nil { - klog.Errorf("Unable to retrieve node %s for tainting %s: %v", nodeName, taint.ToString(), err) - return err - } - newNode := node.DeepCopy() - nodeTaints := newNode.Spec.Taints - - var newTaints []corev1.Taint - for i := range nodeTaints { - if taint.MatchTaint(&nodeTaints[i]) { - klog.Infof("Removing taint %s from Node %s", taint.ToString(), node.Name) - continue - } - newTaints = append(newTaints, nodeTaints[i]) - } - - newNode.Spec.Taints = newTaints - err = k.PatchNode(node, newNode) - if err != nil { - klog.Errorf("Unable to remove taint %s on node %s: %v", taint.ToString(), node.Name, err) - return err - } - klog.Infof("Removed taint %s on node %s", taint.ToString(), node.Name) - return nil -} - // SetLabelsOnNode takes the node name and map of key/value string pairs to set as labels func (k *Kube) SetLabelsOnNode(nodeName string, labels map[string]interface{}) error { patch := struct { diff --git a/go-controller/pkg/kube/kube_test.go b/go-controller/pkg/kube/kube_test.go index d93119ff3a..72d6dd5a46 100644 --- a/go-controller/pkg/kube/kube_test.go +++ b/go-controller/pkg/kube/kube_test.go @@ -16,7 +16,6 @@ var _ = Describe("Kube", func() { Describe("Taint node operations", func() { var kube Kube var existingNodeTaints []corev1.Taint - var taint corev1.Taint var node *corev1.Node BeforeEach(func() { @@ -24,7 +23,6 @@ var _ = Describe("Kube", func() { kube = Kube{ KClient: fakeClient, } - taint = corev1.Taint{Key: "my-taint-key", Value: "my-taint-value", Effect: corev1.TaintEffectNoSchedule} }) JustBeforeEach(func() { @@ -43,90 +41,6 @@ var _ = Describe("Kube", func() { Expect(err).ToNot(HaveOccurred()) Expect(node).NotTo(BeZero()) }) - - Context("with a node not having the taint", func() { - - BeforeEach(func() { - existingNodeTaints = make([]corev1.Taint, 0) - }) - - Context("SetTaintOnNode", func() { - It("should add the taint to the node", func() { - err := kube.SetTaintOnNode(node.Name, &taint) - Expect(err).ToNot(HaveOccurred()) - - loadedNode, err := kube.KClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - Expect(loadedNode.Spec.Taints).To(ContainElement(taint)) - }) - }) - - Context("RemoveTaintFromNode", func() { - It("should remove nothing from the node", func() { - err := kube.RemoveTaintFromNode(node.Name, &taint) - Expect(err).ToNot(HaveOccurred()) - - loadedNode, err := kube.KClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - Expect(loadedNode.Spec.Taints).To(HaveLen(len(node.Spec.Taints))) - }) - }) - }) - - Context("with a node having the same taint already", func() { - - BeforeEach(func() { - existingNodeTaints = []corev1.Taint{taint} - }) - - Context("SetTaintOnNode", func() { - It("should update the taint of the node if effect differs", func() { - updatedTaint := taint.DeepCopy() - updatedTaint.Effect = corev1.TaintEffectPreferNoSchedule - - err := kube.SetTaintOnNode(node.Name, updatedTaint) - Expect(err).ToNot(HaveOccurred()) - - loadedNode, err := kube.KClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - Expect(loadedNode.Spec.Taints).To(ContainElement(*updatedTaint)) - }) - - It("should update nothing if taint is the same", func() { - err := kube.SetTaintOnNode(node.Name, &taint) - Expect(err).ToNot(HaveOccurred()) - - updatedNode, err := kube.GetNodeForWindows(node.Name) - Expect(err).ToNot(HaveOccurred()) - Expect(updatedNode.Spec.Taints).To(Equal([]corev1.Taint{taint})) - }) - }) - - Context("RemoveTaintFromNode", func() { - It("should remove the taint from the node", func() { - err := kube.RemoveTaintFromNode(node.Name, &taint) - Expect(err).ToNot(HaveOccurred()) - - loadedNode, err := kube.KClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - Expect(loadedNode.Spec.Taints).NotTo(ContainElement(taint)) - Expect(loadedNode.Spec.Taints).To(HaveLen(len(node.Spec.Taints) - 1)) - }) - }) - }) - - Context("with references to a node which doesn't exist", func() { - - Context("SetTaintOnNode", func() { - - It("should return an error, if node doesn't exist", func() { - nodeName := "targaryen" - err := kube.SetTaintOnNode(nodeName, &taint) - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal("nodes \"targaryen\" not found")) - }) - }) - }) }) Describe("SetAnnotationsOnPod", func() { diff --git a/go-controller/pkg/kube/mocks/Interface.go b/go-controller/pkg/kube/mocks/Interface.go index 594d33d699..2e68b7898f 100644 --- a/go-controller/pkg/kube/mocks/Interface.go +++ b/go-controller/pkg/kube/mocks/Interface.go @@ -145,24 +145,6 @@ func (_m *Interface) PatchNode(old *corev1.Node, new *corev1.Node) error { return r0 } -// RemoveTaintFromNode provides a mock function with given fields: nodeName, taint -func (_m *Interface) RemoveTaintFromNode(nodeName string, taint *corev1.Taint) error { - ret := _m.Called(nodeName, taint) - - if len(ret) == 0 { - panic("no return value specified for RemoveTaintFromNode") - } - - var r0 error - if rf, ok := ret.Get(0).(func(string, *corev1.Taint) error); ok { - r0 = rf(nodeName, taint) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // SetAnnotationsOnNamespace provides a mock function with given fields: namespaceName, annotations func (_m *Interface) SetAnnotationsOnNamespace(namespaceName string, annotations map[string]interface{}) error { ret := _m.Called(namespaceName, annotations) @@ -253,24 +235,6 @@ func (_m *Interface) SetLabelsOnNode(nodeName string, labels map[string]interfac return r0 } -// SetTaintOnNode provides a mock function with given fields: nodeName, taint -func (_m *Interface) SetTaintOnNode(nodeName string, taint *corev1.Taint) error { - ret := _m.Called(nodeName, taint) - - if len(ret) == 0 { - panic("no return value specified for SetTaintOnNode") - } - - var r0 error - if rf, ok := ret.Get(0).(func(string, *corev1.Taint) error); ok { - r0 = rf(nodeName, taint) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // UpdateNodeStatus provides a mock function with given fields: node func (_m *Interface) UpdateNodeStatus(node *corev1.Node) error { ret := _m.Called(node) diff --git a/go-controller/pkg/kube/mocks/InterfaceOVN.go b/go-controller/pkg/kube/mocks/InterfaceOVN.go index 18e93ed800..ef253ba2be 100644 --- a/go-controller/pkg/kube/mocks/InterfaceOVN.go +++ b/go-controller/pkg/kube/mocks/InterfaceOVN.go @@ -415,24 +415,6 @@ func (_m *InterfaceOVN) SetLabelsOnNode(nodeName string, labels map[string]inter return r0 } -// SetTaintOnNode provides a mock function with given fields: nodeName, taint -func (_m *InterfaceOVN) SetTaintOnNode(nodeName string, taint *apicorev1.Taint) error { - ret := _m.Called(nodeName, taint) - - if len(ret) == 0 { - panic("no return value specified for SetTaintOnNode") - } - - var r0 error - if rf, ok := ret.Get(0).(func(string, *apicorev1.Taint) error); ok { - r0 = rf(nodeName, taint) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // UpdateCloudPrivateIPConfig provides a mock function with given fields: cloudPrivateIPConfig func (_m *InterfaceOVN) UpdateCloudPrivateIPConfig(cloudPrivateIPConfig *v1.CloudPrivateIPConfig) (*v1.CloudPrivateIPConfig, error) { ret := _m.Called(cloudPrivateIPConfig) From b39031c05909ef73ed4d8984d074b46bc69950cf Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Tue, 14 Oct 2025 11:43:54 -0400 Subject: [PATCH 12/20] Increase single target attempts While trying to reproduce flakes with these tests, this is the thing I could reproduce easily. In the tests we add 20 target IPs to each gateway, then we ping them to make sure they go to each gateway and get resolved. However, for TCP/UDP tests, we only run a listener on one of the target IPs. Then we would attempt to contact the listenter from a source pod 20 times, and check that it hit both gateways. In my testing, I can easily run these tests in a loop and see it fail, due to all 20 of the attempts hashing to the same gateway, and never hitting the other gateway. I bumped it to 50, and ran it all night and do not see the issue anymore. Not sure if this fixes all of the flakes we see with these tests, as the logs have gone stale for other runs, but will consider this closed for now and then if we see more flakes reopen it. Closes: #4432 Signed-off-by: Tim Rozet --- test/e2e/external_gateways.go | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/test/e2e/external_gateways.go b/test/e2e/external_gateways.go index 628866910b..8900f2180e 100644 --- a/test/e2e/external_gateways.go +++ b/test/e2e/external_gateways.go @@ -88,10 +88,11 @@ type gatewayTestIPs struct { var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { const ( - gwTCPPort = 80 - gwUDPPort = 90 - podTCPPort = 80 - podUDPPort = 90 + gwTCPPort = 80 + gwUDPPort = 90 + podTCPPort = 80 + podUDPPort = 90 + singleTargetRetries = 50 // enough attempts to avoid hashing to the same gateway ) // Validate pods can reach a network running in a container's loopback address via @@ -577,7 +578,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { args := []string{"exec", srcPingPodName, "--"} if protocol == "tcp" { args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", gwIP, gwPort)) @@ -744,7 +745,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { // Picking only the first address, the one the udp listener is set for gwIP := addresses.targetIPs[0] - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -1166,7 +1167,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -1387,7 +1388,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { // Picking only the first address, the one the udp listener is set for gwIP := addresses.targetIPs[0] - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -1554,7 +1555,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { args := []string{"exec", srcPingPodName, "--"} if protocol == "tcp" { args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", gwIP, gwPort)) @@ -1849,7 +1850,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { // Picking only the first address, the one the udp listener is set for gwIP := addresses.targetIPs[0] - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -2273,7 +2274,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -2491,7 +2492,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { // Picking only the first address, the one the udp listener is set for gwIP := addresses.targetIPs[0] - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -2663,7 +2664,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { args := []string{"exec", srcPingPodName, "--"} if protocol == "tcp" { args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", gwIP, gwPort)) From 2871cdae138b10a282a3a930c5f5c516f9c523bc Mon Sep 17 00:00:00 2001 From: Lei Huang Date: Mon, 13 Oct 2025 11:31:51 -0700 Subject: [PATCH 13/20] RunOVSAppctl() doesn't work when ovs is run on host and hostPID is false When OVS is run as system service on node, the /run/openvswitch/ovs-vswitchd.pid is locked by ovs-vswitchd with its PID in host process ID namespace: ``` $ lslocks | grep ovs-vswitchd.pid COMMAND PID TYPE SIZE MODE M START END PATH ovs-vswitchd 1615 POSIX 5B WRITE 0 0 0 /run/openvswitch/ovs-vswitchd.pid $ stat -Lc '%d:%i %n' /run/openvswitch/ovs-vswitchd.pid 25:5398 /run/openvswitch/ovs-vswitchd.pid ``` In ovnkube-node Pod, if hostPID is false, the ovs-vswitchd's PID is not visible inside the Pod's process ID namespace, so the file lock becomes invisible as well, that causes ovs-appctl fail to run: ``` $ ovs-appctl fdb/show br-int 2025-10-14T19:18:36Z|00001|daemon_unix|WARN|/var/run/openvswitch/ovs-vswitchd.pid: stale pidfile for pid 1615 being deleted by pid 0 ovs-appctl: cannot read pidfile "/var/run/openvswitch/ovs-vswitchd.pid" (No such process) command terminated with exit code 1 $ stat -Lc '%d:%i %n' /run/openvswitch/ovs-vswitchd.pid 25:5398 /run/openvswitch/ovs-vswitchd.pid ``` This change replaces RunOVSAppctl() with RunOvsVswitchdAppCtl(), which use `-t /var/run/openvswitch/ovs-vswitchd.1234.ctl` option to skip reading pid file. Signed-off-by: Lei Huang --- go-controller/pkg/metrics/ovs.go | 2 +- .../node/default_node_network_controller.go | 8 +--- .../pkg/node/gateway_init_linux_test.go | 38 +++++++++++++---- go-controller/pkg/node/gateway_udn_test.go | 8 +++- go-controller/pkg/util/ovs.go | 9 +--- go-controller/pkg/util/ovs_unit_test.go | 41 ------------------- 6 files changed, 39 insertions(+), 67 deletions(-) diff --git a/go-controller/pkg/metrics/ovs.go b/go-controller/pkg/metrics/ovs.go index 718fa031e7..b4c95ec86f 100644 --- a/go-controller/pkg/metrics/ovs.go +++ b/go-controller/pkg/metrics/ovs.go @@ -907,7 +907,7 @@ func registerOvsMetrics(ovsDBClient libovsdbclient.Client, metricsScrapeInterval } // OVS datapath metrics updater - go ovsDatapathMetricsUpdater(util.RunOVSAppctl, metricsScrapeInterval, stopChan) + go ovsDatapathMetricsUpdater(util.RunOvsVswitchdAppCtl, metricsScrapeInterval, stopChan) // OVS bridge metrics updater go ovsBridgeMetricsUpdater(ovsDBClient, util.RunOVSOfctl, metricsScrapeInterval, stopChan) // OVS interface metrics updater diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index e3923fcc54..cd81267cc2 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -511,13 +511,7 @@ func setEncapPort(ctx context.Context) error { func isOVNControllerReady() (bool, error) { // check node's connection status - runDir := util.GetOvnRunDir() - pid, err := os.ReadFile(runDir + "ovn-controller.pid") - if err != nil { - return false, fmt.Errorf("unknown pid for ovn-controller process: %v", err) - } - ctlFile := runDir + fmt.Sprintf("ovn-controller.%s.ctl", strings.TrimSuffix(string(pid), "\n")) - ret, _, err := util.RunOVSAppctl("-t", ctlFile, "connection-status") + ret, _, err := util.RunOVNControllerAppCtl("connection-status") if err != nil { return false, fmt.Errorf("could not get connection status: %w", err) } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index b46ef36c84..bc343c5aaf 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -19,6 +19,7 @@ import ( "github.com/containernetworking/plugins/pkg/testutils" nadfake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" "github.com/k8snetworkplumbingwg/sriovnet" + "github.com/spf13/afero" "github.com/stretchr/testify/mock" "github.com/urfave/cli/v2" "github.com/vishvananda/netlink" @@ -203,7 +204,7 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, Output: systemID, }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-appctl --timeout=15 dpif/show-dp-features breth0", + Cmd: "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl dpif/show-dp-features breth0", Output: "Check pkt length action: Yes", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ @@ -211,7 +212,7 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, Output: fmt.Sprintf("%t", hwOffload), }) fexec.AddFakeCmdsNoOutputNoError([]string{ - "ovs-appctl --timeout=15 fdb/add breth0 breth0 0 " + eth0MAC, + "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl fdb/add breth0 breth0 0 " + eth0MAC, }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: "ovs-vsctl --timeout=15 get Interface patch-breth0_node1-to-br-int ofport", @@ -258,7 +259,13 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, }) // syncServices() - err := util.SetExec(fexec) + // Setup mock filesystem for ovs-vswitchd.pid file needed by ovs-appctl commands + err := util.AppFs.MkdirAll("/var/run/openvswitch/", 0o755) + Expect(err).NotTo(HaveOccurred()) + err = afero.WriteFile(util.AppFs, "/var/run/openvswitch/ovs-vswitchd.pid", []byte("1234"), 0o644) + Expect(err).NotTo(HaveOccurred()) + + err = util.SetExec(fexec) Expect(err).NotTo(HaveOccurred()) _, err = config.InitConfig(ctx, fexec, nil) @@ -645,7 +652,7 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, }) // DetectCheckPktLengthSupport fexec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-appctl --timeout=15 dpif/show-dp-features " + brphys, + Cmd: "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl dpif/show-dp-features " + brphys, Output: "Check pkt length action: Yes", }) // IsOvsHwOffloadEnabled @@ -654,7 +661,7 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, Output: "false", }) fexec.AddFakeCmdsNoOutputNoError([]string{ - fmt.Sprintf("ovs-appctl --timeout=15 fdb/add %s %s 0 %s", brphys, brphys, hostMAC), + fmt.Sprintf("ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl fdb/add %s %s 0 %s", brphys, brphys, hostMAC), }) // GetDPUHostInterface fexec.AddFakeCmd(&ovntest.ExpectedCmd{ @@ -706,7 +713,14 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, Output: "7", }) // syncServices() - err := util.SetExec(fexec) + + // Setup mock filesystem for ovs-vswitchd.pid file needed by ovs-appctl commands + err := util.AppFs.MkdirAll("/var/run/openvswitch/", 0o755) + Expect(err).NotTo(HaveOccurred()) + err = afero.WriteFile(util.AppFs, "/var/run/openvswitch/ovs-vswitchd.pid", []byte("1234"), 0o644) + Expect(err).NotTo(HaveOccurred()) + + err = util.SetExec(fexec) Expect(err).NotTo(HaveOccurred()) _, err = config.InitConfig(ctx, fexec, nil) @@ -1110,7 +1124,7 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0` Output: systemID, }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-appctl --timeout=15 dpif/show-dp-features breth0", + Cmd: "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl dpif/show-dp-features breth0", Output: "Check pkt length action: Yes", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ @@ -1118,7 +1132,7 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0` Output: "false", }) fexec.AddFakeCmdsNoOutputNoError([]string{ - "ovs-appctl --timeout=15 fdb/add breth0 breth0 0 " + eth0MAC, + "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl fdb/add breth0 breth0 0 " + eth0MAC, }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: "ovs-vsctl --timeout=15 get Interface patch-breth0_node1-to-br-int ofport", @@ -1170,7 +1184,13 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0` }) // syncServices() - err := util.SetExec(fexec) + // Setup mock filesystem for ovs-vswitchd.pid file needed by ovs-appctl commands + err := util.AppFs.MkdirAll("/var/run/openvswitch/", 0o755) + Expect(err).NotTo(HaveOccurred()) + err = afero.WriteFile(util.AppFs, "/var/run/openvswitch/ovs-vswitchd.pid", []byte("1234"), 0o644) + Expect(err).NotTo(HaveOccurred()) + + err = util.SetExec(fexec) Expect(err).NotTo(HaveOccurred()) _, err = config.InitConfig(ctx, fexec, nil) diff --git a/go-controller/pkg/node/gateway_udn_test.go b/go-controller/pkg/node/gateway_udn_test.go index 984a666195..cf6ee7e1dc 100644 --- a/go-controller/pkg/node/gateway_udn_test.go +++ b/go-controller/pkg/node/gateway_udn_test.go @@ -14,6 +14,7 @@ import ( "github.com/containernetworking/plugins/pkg/testutils" nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" nadfake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" + "github.com/spf13/afero" "github.com/stretchr/testify/mock" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" @@ -170,7 +171,7 @@ func setUpGatewayFakeOVSCommands(fexec *ovntest.FakeExec) { Output: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-appctl --timeout=15 dpif/show-dp-features breth0", + Cmd: "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl dpif/show-dp-features breth0", Output: "Check pkt length action: Yes", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ @@ -178,7 +179,7 @@ func setUpGatewayFakeOVSCommands(fexec *ovntest.FakeExec) { Output: "false", }) fexec.AddFakeCmdsNoOutputNoError([]string{ - "ovs-appctl --timeout=15 fdb/add breth0 breth0 0 00:00:00:55:66:99", + "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl fdb/add breth0 breth0 0 00:00:00:55:66:99", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: "ovs-vsctl --timeout=15 get Interface patch-breth0_worker1-to-br-int ofport", @@ -293,6 +294,9 @@ var _ = Describe("UserDefinedNetworkGateway", func() { config.Gateway.V4MasqueradeSubnet = "169.254.0.0/17" // Set up a fake vsctl command mock interface fexec = ovntest.NewFakeExec() + // Setup mock filesystem for ovs-vswitchd.pid file needed by ovs-appctl commands + Expect(util.AppFs.MkdirAll("/var/run/openvswitch/", 0o755)).To(Succeed()) + Expect(afero.WriteFile(util.AppFs, "/var/run/openvswitch/ovs-vswitchd.pid", []byte("1234"), 0o644)).To(Succeed()) Expect(util.SetExec(fexec)).To(Succeed()) // Set up a fake k8sMgmt interface testNS, err = testutils.NewNS() diff --git a/go-controller/pkg/util/ovs.go b/go-controller/pkg/util/ovs.go index f6e3ca0ad0..12ed487c6a 100644 --- a/go-controller/pkg/util/ovs.go +++ b/go-controller/pkg/util/ovs.go @@ -362,11 +362,6 @@ func RunOVSAppctlWithTimeout(timeout int, args ...string) (string, string, error return strings.Trim(strings.TrimSpace(stdout.String()), "\""), stderr.String(), err } -// RunOVSAppctl runs a command via ovs-appctl. -func RunOVSAppctl(args ...string) (string, string, error) { - return RunOVSAppctlWithTimeout(ovsCommandTimeout, args...) -} - // RunOVNAppctlWithTimeout runs a command via ovn-appctl. If ovn-appctl is not present, then it // falls back to using ovs-appctl. func RunOVNAppctlWithTimeout(timeout int, args ...string) (string, string, error) { @@ -802,7 +797,7 @@ func DetectSCTPSupport() (bool, error) { // DetectCheckPktLengthSupport checks if OVN supports check packet length action in OVS kernel datapath func DetectCheckPktLengthSupport(bridge string) (bool, error) { - stdout, stderr, err := RunOVSAppctl("dpif/show-dp-features", bridge) + stdout, stderr, err := RunOvsVswitchdAppCtl("dpif/show-dp-features", bridge) if err != nil { klog.Errorf("Failed to query OVS for check packet length support, "+ "stdout: %q, stderr: %q, error: %v", stdout, stderr, err) @@ -824,7 +819,7 @@ func DetectCheckPktLengthSupport(bridge string) (bool, error) { func SetStaticFDBEntry(bridge, port string, mac net.HardwareAddr) error { // Assume default VLAN for local port vlan := "0" - stdout, stderr, err := RunOVSAppctl("fdb/add", bridge, port, vlan, mac.String()) + stdout, stderr, err := RunOvsVswitchdAppCtl("fdb/add", bridge, port, vlan, mac.String()) if err != nil { return fmt.Errorf("failed to add FDB entry to OVS for LOCAL port, "+ "stdout: %q, stderr: %q, error: %v", stdout, stderr, err) diff --git a/go-controller/pkg/util/ovs_unit_test.go b/go-controller/pkg/util/ovs_unit_test.go index ba6e63678a..3dcddc9a88 100644 --- a/go-controller/pkg/util/ovs_unit_test.go +++ b/go-controller/pkg/util/ovs_unit_test.go @@ -1092,47 +1092,6 @@ func TestRunOVSAppctlWithTimeout(t *testing.T) { } } -func TestRunOVSAppctl(t *testing.T) { - mockKexecIface := new(mock_k8s_io_utils_exec.Interface) - mockExecRunner := new(mocks.ExecRunner) - mockCmd := new(mock_k8s_io_utils_exec.Cmd) - // below is defined in ovs.go - runCmdExecRunner = mockExecRunner - // note runner is defined in ovs.go file - runner = &execHelper{exec: mockKexecIface} - tests := []struct { - desc string - expectedErr error - onRetArgsExecUtilsIface *ovntest.TestifyMockHelper - onRetArgsKexecIface *ovntest.TestifyMockHelper - }{ - { - desc: "negative: run `ovs-appctl` command", - expectedErr: fmt.Errorf("failed to execute ovs-appctl command"), - onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string"}, RetArgList: []interface{}{nil, nil, fmt.Errorf("failed to execute ovs-appctl command")}}, - onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string"}, RetArgList: []interface{}{mockCmd}}, - }, - { - desc: "positive: run `ovs-appctl` command", - expectedErr: nil, - onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string"}, RetArgList: []interface{}{bytes.NewBuffer([]byte("testblah")), bytes.NewBuffer([]byte("")), nil}}, - onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string"}, RetArgList: []interface{}{mockCmd}}, - }, - } - for i, tc := range tests { - t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { - ovntest.ProcessMockFn(&mockExecRunner.Mock, *tc.onRetArgsExecUtilsIface) - ovntest.ProcessMockFn(&mockKexecIface.Mock, *tc.onRetArgsKexecIface) - - _, _, e := RunOVSAppctl() - - assert.Equal(t, tc.expectedErr, e) - mockExecRunner.AssertExpectations(t) - mockKexecIface.AssertExpectations(t) - }) - } -} - func TestRunOVNAppctlWithTimeout(t *testing.T) { mockKexecIface := new(mock_k8s_io_utils_exec.Interface) mockExecRunner := new(mocks.ExecRunner) From 628c3c4a34605e0ab900e6fd72b36c79c0a62962 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Tue, 14 Oct 2025 18:32:55 -0400 Subject: [PATCH 14/20] Fixes BFD timeout for external gateway E2E The external gatweay tests use default BFD timers, which in OVN is a send frequency of every 1 second, with a max of 3 failures - or 3 seconds total. The tests would remove an external gateway, wait 3 seconds, and then send a packet from a pod client. We notice in CI upstream sometimes this flakes on the first attempt and causes the test case to fail. I cannot reproduce this locally, but we can see that the math is wrong here. If the the external gateway was deleted at the same time that a heart beat was sent and ack'ed by OVN, then it would require almost 4 seconds to detect 3 more failures and transition BFD down. Therefore make the timeout a constant and bump it to 4 seconds. Signed-off-by: Tim Rozet --- test/e2e/external_gateways.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/test/e2e/external_gateways.go b/test/e2e/external_gateways.go index 8900f2180e..6f32d78159 100644 --- a/test/e2e/external_gateways.go +++ b/test/e2e/external_gateways.go @@ -93,6 +93,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { podTCPPort = 80 podUDPPort = 90 singleTargetRetries = 50 // enough attempts to avoid hashing to the same gateway + bfdTimeout = 4 * time.Second ) // Validate pods can reach a network running in a container's loopback address via @@ -1111,7 +1112,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { ginkgo.By("Deleting one container") err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) @@ -1189,7 +1190,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) // ECMP should direct all the traffic to the only container expectedHostName := hostNameForExternalContainer(gwContainers[0]) @@ -1328,7 +1329,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { ginkgo.By("Deleting one container") err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) pingSync = sync.WaitGroup{} tcpDumpSync = sync.WaitGroup{} @@ -1409,7 +1410,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) // ECMP should direct all the traffic to the only container expectedHostName := hostNameForExternalContainer(gwContainers[0]) @@ -2216,7 +2217,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) @@ -2296,7 +2297,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) // ECMP should direct all the traffic to the only container expectedHostName := hostNameForExternalContainer(gwContainers[0]) @@ -2433,7 +2434,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { ginkgo.By("Deleting one container") err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) pingSync = sync.WaitGroup{} tcpDumpSync = sync.WaitGroup{} @@ -2513,7 +2514,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) // ECMP should direct all the traffic to the only container expectedHostName := hostNameForExternalContainer(gwContainers[0]) From 8e7b2c6f66a820b6d63151326a00c62fb8bc7b65 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 20 Mar 2025 08:14:59 +0100 Subject: [PATCH 15/20] traffic-flow-tests: update to latest version of k8s-tft Get the latest changes from [1]. There are some improvements, but it is supposed to work the same (if not better). [1] https://github.com/ovn-kubernetes/kubernetes-traffic-flow-tests/commit/ce924ee2113a80e8d7b58671da020a2c9a21fd26 Signed-off-by: Thomas Haller --- test/scripts/traffic-flow-tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/scripts/traffic-flow-tests.sh b/test/scripts/traffic-flow-tests.sh index 9fb88acfb4..e597730f30 100755 --- a/test/scripts/traffic-flow-tests.sh +++ b/test/scripts/traffic-flow-tests.sh @@ -8,7 +8,7 @@ export OCI_BIN="${KIND_EXPERIMENTAL_PROVIDER:-docker}" export TFT_TEST_IMAGE="ghcr.io/ovn-kubernetes/kubernetes-traffic-flow-tests:latest" export TRAFFIC_FLOW_TESTS_DIRNAME="kubernetes-traffic-flow-tests" export TRAFFIC_FLOW_TESTS_REPO="https://github.com/ovn-kubernetes/kubernetes-traffic-flow-tests.git" -export TRAFFIC_FLOW_TESTS_COMMIT="3e092f2c448bdffcddd5d025b97df53145ccea62" +export TRAFFIC_FLOW_TESTS_COMMIT="ce924ee2113a80e8d7b58671da020a2c9a21fd26" export TRAFFIC_FLOW_TESTS="${TRAFFIC_FLOW_TESTS:-1,2,3}" export SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" From cd70830024d82f03ae1e9c3c8b9b2560a0869fd5 Mon Sep 17 00:00:00 2001 From: Andreas Karis Date: Tue, 23 Sep 2025 20:14:55 +0200 Subject: [PATCH 16/20] E2E service: Add test for named port handling with ETP=Local services The test validates LoadBalancer services with: - Named targetPorts (http/udp) instead of numeric ports - AllocateLoadBalancerNodePorts=false configuration - ExternalTrafficPolicy=Local behavior Signed-off-by: Andreas Karis --- test/e2e/service.go | 222 +++++++++++++++++++++++++++++++++++++++++--- test/e2e/util.go | 132 ++++++++++++++++++++++---- 2 files changed, 321 insertions(+), 33 deletions(-) diff --git a/test/e2e/service.go b/test/e2e/service.go index 21ffed62f2..069dd6794a 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -2006,6 +2006,196 @@ spec: // we don't... Is this check really needed? }) + ginkgo.It("Should ensure load balancer service works with 0 node ports when named targetPorts are used and ETP=local", func() { + err := WaitForServingAndReadyServiceEndpointsNum(context.TODO(), f.ClientSet, namespaceName, svcName, 4, time.Second, time.Second*180) + framework.ExpectNoError(err, fmt.Sprintf("service: %s never had an endpoint, err: %v", svcName, err)) + + svcLoadBalancerIP, err := getServiceLoadBalancerIP(f.ClientSet, namespaceName, svcName) + framework.ExpectNoError(err, fmt.Sprintf("failed to get service lb ip: %s, err: %v", svcName, err)) + + time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly + + checkExactETPRules := func(noSNATServicesSet string) { + svc, err := f.ClientSet.CoreV1().Services(namespaceName).Get(context.TODO(), svcName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + // Retrieve the loadbalancer's IP + var lbIP string + if len(svc.Status.LoadBalancer.Ingress) > 0 { + lbIP = svc.Status.LoadBalancer.Ingress[0].IP + } + + discoveryClient := f.ClientSet.DiscoveryV1() + endpointSlices, err := discoveryClient.EndpointSlices(namespaceName).List(context.TODO(), metav1.ListOptions{ + LabelSelector: fmt.Sprintf("kubernetes.io/service-name=%s", svcName), + }) + framework.ExpectNoError(err) + + // Retrieve unique addresses from all endpointslices + uniqueAddresses := sets.New[string]() + for _, es := range endpointSlices.Items { + uniqueAddresses = uniqueAddresses.Union(getServingAndReadyEndpointSliceAddresses(es)) + } + + mask := 32 + if utilnet.IsIPv6String(lbIP) { + mask = 128 + } + + // Build regex patterns for iptables rules using lbIP and uniqueAddresses + var patterns []string + var sets [][]string + for address := range uniqueAddresses { + tcpPattern := fmt.Sprintf(".*-A OVN-KUBE-ETP -d %s/%d -p tcp -m tcp --dport 80 -m statistic "+ + "--mode random --probability .* -j DNAT --to-destination %s$", + regexp.QuoteMeta(lbIP), mask, regexp.QuoteMeta(net.JoinHostPort(address, "80"))) + patterns = append(patterns, tcpPattern) + udpPattern := fmt.Sprintf(".*-A OVN-KUBE-ETP -d %s/%d -p udp -m udp --dport 10001 -m statistic "+ + "--mode random --probability .* -j DNAT --to-destination %s$", + regexp.QuoteMeta(lbIP), mask, regexp.QuoteMeta(net.JoinHostPort(address, "10001"))) + patterns = append(patterns, udpPattern) + + sets = append(sets, []string{address, "tcp", "80"}) + sets = append(sets, []string{address, "udp", "10001"}) + } + err = wait.PollImmediate(retryInterval, retryTimeout, checkIPTablesRulesPresent(backendNodeName, patterns)) + framework.ExpectNoError(err, "Couldn't fetch the correct iptables rules, expected to find: %v, err: %v", patterns, err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNFTElementsPresent(backendNodeName, noSNATServicesSet, sets)) + framework.ExpectNoError(err, "Couldn't fetch the correct nft elements, expected to find: %v, err: %v", sets, err) + } + + noSNATServicesSet := "mgmtport-no-snat-services-v4" + if utilnet.IsIPv6String(svcLoadBalancerIP) { + noSNATServicesSet = "mgmtport-no-snat-services-v6" + } + + // Initial sanity check. + ginkgo.By("checking number of firewall rules for baseline") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 2, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 5, "OVN-KUBE-EXTERNALIP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + + ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) + externalContainer := infraapi.ExternalContainer{Name: externalClientContainerName} + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") + framework.ExpectNoError(err, "failed to curl load balancer service") + + // Patch the service to use named ports. + ginkgo.By("patching service " + svcName + " to named ports") + err = patchServiceStringValue(f.ClientSet, svcName, "default", "/spec/ports/0/targetPort", "http") + framework.ExpectNoError(err) + err = patchServiceStringValue(f.ClientSet, svcName, "default", "/spec/ports/1/targetPort", "udp") + framework.ExpectNoError(err) + output := e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.ports[0].targetPort}'") + gomega.Expect(output).To(gomega.Equal("'http'")) + output = e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.ports[1].targetPort}'") + gomega.Expect(output).To(gomega.Equal("'udp'")) + time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly + ginkgo.By("checking number of firewall rules for named ports") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 2, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 5, "OVN-KUBE-EXTERNALIP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) + externalContainer = infraapi.ExternalContainer{Name: externalClientContainerName} + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") + framework.ExpectNoError(err, "failed to curl load balancer service") + + // Patch the service to use allocateLoadBalancerNodeProts=false and externalTrafficPolicy=local. + ginkgo.By("patching service " + svcName + " to allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local") + + err = patchServiceBoolValue(f.ClientSet, svcName, "default", "/spec/allocateLoadBalancerNodePorts", false) + framework.ExpectNoError(err) + + output = e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.allocateLoadBalancerNodePorts}'") + gomega.Expect(output).To(gomega.Equal("'false'")) + + err = patchServiceStringValue(f.ClientSet, svcName, "default", "/spec/externalTrafficPolicy", "Local") + framework.ExpectNoError(err) + + output = e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.externalTrafficPolicy}'") + gomega.Expect(output).To(gomega.Equal("'Local'")) + + time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly + + ginkgo.By("checking number of firewall rules for allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 10, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 8, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + + ginkgo.By("checking exact ETP firewall rules for allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local") + checkExactETPRules(noSNATServicesSet) + + ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) + + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") + framework.ExpectNoError(err, "failed to curl load balancer service") + + pktSize := 60 + if utilnet.IsIPv6String(svcLoadBalancerIP) { + pktSize = 80 + } + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + // FIXME: This used to check that the no-snat rule had been hit, but nftables + // doesn't attach counters to rules unless you explicitly request them, which + // we don't... Is this check really needed? + + ginkgo.By("Scale down endpoints of service: " + svcName + " to ensure iptable rules are also getting recreated correctly") + e2ekubectl.RunKubectlOrDie("default", "scale", "deployment", backendName, "--replicas=3") + err = WaitForServingAndReadyServiceEndpointsNum(context.TODO(), f.ClientSet, namespaceName, svcName, 3, time.Second, time.Second*180) + framework.ExpectNoError(err, fmt.Sprintf("service: %s never had an endpoint, err: %v", svcName, err)) + time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly + + // number of rules/elements should have decreased by 2 (one for the TCP port, + // one for UDP) + ginkgo.By("checking number of firewall rules after scale down") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 8, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 6, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + ginkgo.By("checking exact ETP firewall rules for allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local after scale down") + checkExactETPRules(noSNATServicesSet) + + ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) + + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") + framework.ExpectNoError(err, "failed to curl load balancer service") + + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + // FIXME: This used to check that the no-snat rule had been hit, but nftables + // doesn't attach counters to rules unless you explicitly request them, which + // we don't... Is this check really needed? + + // Also test proper deletion logic. + ginkgo.By("deleting the service") + e2ekubectl.RunKubectlOrDie("default", "delete", "service", svcName) + ginkgo.By("checking number of firewall rules after service delete") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 2, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 3, "OVN-KUBE-EXTERNALIP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + }) + ginkgo.It("Should ensure load balancer service works when ETP=local and session affinity is set", func() { err := WaitForServingAndReadyServiceEndpointsNum(context.TODO(), f.ClientSet, namespaceName, svcName, 4, time.Second, time.Second*180) @@ -2531,19 +2721,25 @@ func countServingAndReadyEndpointsSlicesNum(epList *discoveryv1.EndpointSliceLis // Only count unique addresses that are Ready and not Terminating addresses := sets.New[string]() for _, epSlice := range epList.Items { - for _, ep := range epSlice.Endpoints { - cond := ep.Conditions - ready := cond.Ready == nil || *cond.Ready - serving := cond.Serving == nil || *cond.Serving - terminating := cond.Terminating != nil && *cond.Terminating - if !ready || !serving || terminating { - continue - } - if len(ep.Addresses) == 0 || ep.Addresses[0] == "" { - continue - } - addresses.Insert(ep.Addresses[0]) - } + addresses = addresses.Union(getServingAndReadyEndpointSliceAddresses(epSlice)) } return addresses.Len() } + +func getServingAndReadyEndpointSliceAddresses(epSlice discoveryv1.EndpointSlice) sets.Set[string] { + addresses := sets.New[string]() + for _, ep := range epSlice.Endpoints { + cond := ep.Conditions + ready := cond.Ready == nil || *cond.Ready + serving := cond.Serving == nil || *cond.Serving + terminating := cond.Terminating != nil && *cond.Terminating + if !ready || !serving || terminating { + continue + } + if len(ep.Addresses) == 0 || ep.Addresses[0] == "" { + continue + } + addresses.Insert(ep.Addresses[0]) + } + return addresses +} diff --git a/test/e2e/util.go b/test/e2e/util.go index dfa8d56e69..3d880d587a 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "regexp" + "slices" "strconv" "strings" "text/template" @@ -937,14 +938,19 @@ func patchService(c kubernetes.Interface, serviceName, serviceNamespace, jsonPat return nil } -// pokeNodeIPTableRules returns the number of iptables (both ipv6 and ipv4) rules that match the provided pattern -func pokeNodeIPTableRules(nodeName, pattern string) int { +func getNodeIPTRules(nodeName string) string { ipt4Rules, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"iptables-save", "-c"}) framework.ExpectNoError(err, "failed to get iptables rules from node %s", nodeName) ipt6Rules, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip6tables-save", "-c"}) framework.ExpectNoError(err, "failed to get ip6tables rules from node %s", nodeName) iptRules := ipt4Rules + ipt6Rules framework.Logf("DEBUG: Dumping IPTRules %v", iptRules) + return iptRules +} + +// pokeNodeIPTableRules returns the number of iptables (both ipv6 and ipv4) rules that match the provided pattern +func pokeNodeIPTableRules(nodeName, pattern string) int { + iptRules := getNodeIPTRules(nodeName) numOfMatchRules := 0 for _, iptRule := range strings.Split(iptRules, "\n") { match := strings.Contains(iptRule, pattern) @@ -956,13 +962,59 @@ func pokeNodeIPTableRules(nodeName, pattern string) int { return numOfMatchRules } -// countNFTablesElements returns the number of nftables elements in the indicated set -// of the "ovn-kubernetes" table. -func countNFTablesElements(nodeName, name string) int { +func countIPTablesRulesMatches(nodeName string, patterns []string) int { + numMatches := 0 + iptRules := getNodeIPTRules(nodeName) + for _, pattern := range patterns { + for _, iptRule := range strings.Split(iptRules, "\n") { + matched, err := regexp.MatchString(pattern, iptRule) + if err == nil && matched { + numMatches++ + } + } + } + return numMatches +} + +type Elem []string + +func (e *Elem) UnmarshalJSON(data []byte) error { + var str string + var i int + var concatenation map[string][]json.RawMessage + if err := json.Unmarshal(data, &str); err == nil { + *e = []string{str} + return nil + } + if err := json.Unmarshal(data, &i); err == nil { + *e = []string{fmt.Sprintf("%d", i)} + return nil + } + if err := json.Unmarshal(data, &concatenation); err == nil { + concat := concatenation["concat"] + for _, rawMsg := range concat { + var str string + var i int + if err := json.Unmarshal(rawMsg, &str); err == nil { + *e = append(*e, str) + } + if err := json.Unmarshal(rawMsg, &i); err == nil { + *e = append(*e, fmt.Sprintf("%d", i)) + } + } + return nil + } + return fmt.Errorf("could not unmarshal %s", string(data)) +} + +func getNFTablesElements(nodeName, name string) ([]Elem, error) { + array := []Elem{} + nftCmd := []string{"nft", "-j", "list", "set", "inet", "ovn-kubernetes", name} nftElements, err := infraprovider.Get().ExecK8NodeCommand(nodeName, nftCmd) - framework.ExpectNoError(err, "failed to get nftables elements from node %s", nodeName) - + if err != nil { + return array, err + } framework.Logf("DEBUG: Dumping NFTElements %v", nftElements) // The output will look like // @@ -985,23 +1037,40 @@ func countNFTablesElements(nodeName, name string) int { // } // // (Where the "elem" element will be omitted if the set is empty.) - // We just parse this optimistically and catch the panic if it fails. - count := -1 - defer func() { - if recover() != nil { - framework.Logf("JSON parsing error!") - } - }() - jsonResult := map[string][]map[string]map[string]any{} - json.Unmarshal([]byte(nftElements), &jsonResult) + jsonResult := map[string][]map[string]map[string]json.RawMessage{} + if err := json.Unmarshal([]byte(nftElements), &jsonResult); err != nil { + return array, err + } elem := jsonResult["nftables"][1]["set"]["elem"] if elem == nil { - return 0 + return array, err } - elemArray := elem.([]any) - count = len(elemArray) - return count + err = json.Unmarshal(elem, &array) + return array, err +} + +// countNFTablesElements returns the number of nftables elements in the indicated set +// of the "ovn-kubernetes" table. +func countNFTablesElements(nodeName, name string) int { + defer ginkgo.GinkgoRecover() + array, err := getNFTablesElements(nodeName, name) + framework.ExpectNoError(err, "failed to get nftables elements from node %s", nodeName) + return len(array) +} + +func countNFTablesRulesMatches(nodeName, name string, sets [][]string) int { + numMatches := 0 + array, err := getNFTablesElements(nodeName, name) + framework.ExpectNoError(err, "failed to get nftables elements from node %s", nodeName) + for _, set := range sets { + for _, elem := range array { + if slices.Equal(set, elem) { + numMatches++ + } + } + } + return numMatches } func checkNumberOfETPRules(backendNodeName string, value int, pattern string) wait.ConditionFunc { @@ -1025,6 +1094,29 @@ func checkNumberOfNFTElements(backendNodeName string, value int, name string) wa } } +func checkIPTablesRulesPresent(backendNodeName string, patterns []string) wait.ConditionFunc { + return func() (bool, error) { + numMatches := countIPTablesRulesMatches(backendNodeName, patterns) + isExpected := numMatches == len(patterns) + if !isExpected { + framework.Logf("checkIPTablesRulesPresent got: numMatches: %d, expected: %d", + numMatches, len(patterns)) + } + return isExpected, nil + } +} +func checkNFTElementsPresent(backendNodeName, name string, sets [][]string) wait.ConditionFunc { + return func() (bool, error) { + numMatches := countNFTablesRulesMatches(backendNodeName, name, sets) + isExpected := numMatches == len(sets) + if !isExpected { + framework.Logf("checkNFTElementsPresent got: numMatches: %d, expected: %d", + numMatches, len(sets)) + } + return isExpected, nil + } +} + // isDualStackCluster returns 'true' if at least one of the nodes has more than one node subnet. func isDualStackCluster(nodes *v1.NodeList) bool { for _, node := range nodes.Items { From 76f6439d95f69163c061e7fbce281f158a2b28dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 14 Oct 2025 17:19:43 +0000 Subject: [PATCH 17/20] Reintroduce completed pod check in shouldReleaseDeletedPod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I accidentally removed the check in recent PR [1] which could have performance consequences as checking agains other pods has a cost. Reintroduce the check with a hopefully useful comment to prevent it form happening again. [1] https://github.com/ovn-kubernetes/ovn-kubernetes/pull/5626 Signed-off-by: Jaime CaamaƱo Ruiz --- .../pkg/ovn/base_network_controller_pods.go | 6 +++ .../ovn/base_network_controller_pods_test.go | 38 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 8ac4974c00..5a318bd859 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -1045,6 +1045,12 @@ func (bnc *BaseNetworkController) shouldReleaseDeletedPod(pod *corev1.Pod, switc } } + // this pod is being deleted before completion so there is no risk (and no + // need to check) that its IPs are being used by other pod + if !util.PodCompleted(pod) { + return true, nil + } + if bnc.wasPodReleasedBeforeStartup(string(pod.UID), nad) { klog.Infof("Completed pod %s/%s was already released for nad %s before startup", pod.Namespace, diff --git a/go-controller/pkg/ovn/base_network_controller_pods_test.go b/go-controller/pkg/ovn/base_network_controller_pods_test.go index 4d1a6393c4..2987a1ebe2 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods_test.go +++ b/go-controller/pkg/ovn/base_network_controller_pods_test.go @@ -234,3 +234,41 @@ func TestBaseNetworkController_trackPodsReleasedBeforeStartup(t *testing.T) { }) } } + +func TestBaseNetworkController_shouldReleaseDeletedPod(t *testing.T) { + tests := []struct { + name string // description of this test case + // Named input parameters for target function. + pod *corev1.Pod + switchName string + nad string + podIfAddrs []*net.IPNet + want bool + wantErr bool + }{ + { + name: "should release a running pod", + pod: &corev1.Pod{Status: corev1.PodStatus{Phase: corev1.PodRunning}}, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var bnc BaseNetworkController + bnc.ReconcilableNetInfo = &util.DefaultNetInfo{} + got, gotErr := bnc.shouldReleaseDeletedPod(tt.pod, tt.switchName, tt.nad, tt.podIfAddrs) + if gotErr != nil { + if !tt.wantErr { + t.Errorf("shouldReleaseDeletedPod() failed: %v", gotErr) + } + return + } + if tt.wantErr { + t.Fatal("shouldReleaseDeletedPod() succeeded unexpectedly") + } + if got != tt.want { + t.Errorf("shouldReleaseDeletedPod() = %v, want %v", got, tt.want) + } + }) + } +} From 2afbaf6a9252a68ce9446fd2ae293eddbfb80e07 Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Thu, 25 Sep 2025 09:50:26 +0200 Subject: [PATCH 18/20] Skip Pending pods in EgressIP status updates When processing pods during an EgressIP status update, the controller used to stop iterating as soon as it encountered a pod in Pending state (in my case, pod IPs are not found when pod is in pending state with container creating status). This caused any subsequent Running pods to be skipped, leaving their SNAT entries unprogrammed on the egress node. With this change, only Pending pods are skipped, while iteration continues for the rest. This ensures that Running pods are properly processed and their SNAT entries are programmed. This change also skips pods that are unscheduled or use host networking. Signed-off-by: Periyasamy Palanisamy --- go-controller/pkg/ovn/egressip.go | 45 ++- go-controller/pkg/ovn/egressip_test.go | 501 +++++++++++++++++++++++++ 2 files changed, 523 insertions(+), 23 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 31a48d0c99..709e06b3f9 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -288,6 +288,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e } } // CASE 3: EIP object update + var errs []error if old != nil && new != nil { oldEIP := old newEIP := new @@ -374,7 +375,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } if err := e.addNamespaceEgressIPAssignments(ni, newEIP.Name, newEIP.Status.Items, mark, namespace, newEIP.Spec.PodSelector); err != nil { - return fmt.Errorf("network %s: failed to add namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err)) } } } @@ -403,16 +404,13 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e return fmt.Errorf("network %s: failed to delete pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) } } - if util.PodCompleted(pod) { - continue - } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { - return fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } } } @@ -452,7 +450,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e podLabels := labels.Set(pod.Labels) if newPodSelector.Matches(podLabels) { if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { - return fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } } } @@ -471,12 +469,9 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e return fmt.Errorf("network %s: failed to delete pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) } } - if util.PodCompleted(pod) { - continue - } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { - return fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } } } @@ -484,7 +479,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e } } } - return nil + return utilerrors.Join(errs...) } // reconcileEgressIPNamespace reconciles the database configuration setup in nbdb @@ -522,6 +517,7 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa if err != nil { return err } + var errs []error for _, egressIP := range egressIPs { if err := e.egressIPCache.DoWithLock(egressIP.Name, func(key string) error { // get latest egressIP object after we get cache lock to serialize egress ip operations @@ -563,10 +559,10 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa return nil }); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } // reconcileEgressIPPod reconciles the database configuration setup in nbdb @@ -598,6 +594,7 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err if err != nil { return err } + var errs []error for _, egressIP := range egressIPs { if err := e.egressIPCache.DoWithLock(egressIP.Name, func(key string) error { // get latest egressIP object after we get cache lock to serialize egress ip operations @@ -712,10 +709,10 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err return nil }); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } // main reconcile functions end here and local zone controller functions begin @@ -725,16 +722,17 @@ func (e *EgressIPController) addEgressIPAssignments(name string, statusAssignmen if err != nil { return err } + var errs []error for _, namespace := range namespaces { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } if err := e.addNamespaceEgressIPAssignments(ni, name, statusAssignments, mark, namespace, podSelector); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } func (e *EgressIPController) addNamespaceEgressIPAssignments(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, @@ -756,12 +754,13 @@ func (e *EgressIPController) addNamespaceEgressIPAssignments(ni util.NetInfo, na return err } } + var errs []error for _, pod := range pods { if err := e.addPodEgressIPAssignmentsWithLock(ni, name, statusAssignments, mark, pod); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } func (e *EgressIPController) addPodEgressIPAssignmentsWithLock(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, pod *corev1.Pod) error { @@ -778,9 +777,9 @@ func (e *EgressIPController) addPodEgressIPAssignmentsWithLock(ni util.NetInfo, // requires holding the podAssignmentMutex lock func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, pod *corev1.Pod) error { podKey := getPodKey(pod) - // If pod is already in succeeded or failed state, return it without proceeding further. - if util.PodCompleted(pod) { - klog.Infof("Pod %s is already in completed state, skipping egress ip assignment", podKey) + // Ignore completed pods, host networked pods, pods not scheduled + if !util.PodNeedsSNAT(pod) { + klog.Infof("Pod %s is not in desired state, skipping egress ip assignment", podKey) return nil } // If statusAssignments is empty just return, not doing this will delete the @@ -2011,7 +2010,7 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { nadName = nadNames[0] // there should only be one active network } for _, pod := range pods { - if util.PodCompleted(pod) || !util.PodScheduled(pod) || util.PodWantsHostNetwork(pod) { + if !util.PodNeedsSNAT(pod) { continue } if egressLocalNodesCache.Len() == 0 && !e.isPodScheduledinLocalZone(pod) { diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 7455d9ca40..131c63d354 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -11154,6 +11154,507 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" ), ) + ginkgo.DescribeTable( + "should ensure EgressIP skips host-network and pending pods from namespace", + func(includeUnscheduled, includeScheduledButNoIP, includeHostNetwork, interconnect, isPodRemote bool) { + app.Action = func(*cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + config.OVNKubernetesFeature.EnableInterconnect = interconnect + + egressIP1 := "192.168.126.101" + node1IPv4 := "192.168.126.12" + node1IPv4CIDR := node1IPv4 + "/24" + node2IPv4 := "192.168.126.51" + node2IPv4CIDR := node2IPv4 + "/24" + + egressPod3 := *newPodWithLabels(eipNamespace, "egress-pod3", node1Name, podV4IP, egressPodLabel) + egressPod4 := *newPodWithLabels(eipNamespace, "egress-pod4", node1Name, podV4IP2, egressPodLabel) + egressNamespace := newNamespace(eipNamespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4CIDR), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node1Subnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), + "k8s.ovn.org/zone-name": "global", + } + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4CIDR), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node2Subnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.51/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "89fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), + "k8s.ovn.org/zone-name": "global", + } + if isPodRemote { + annotations["k8s.ovn.org/zone-name"] = "remote" + annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node2Switch := &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + } + + pods := []corev1.Pod{egressPod3, egressPod4} + if includeUnscheduled { + unScheduledPod := corev1.Pod{ + ObjectMeta: newPodMeta(eipNamespace, "egress-pod", egressPodLabel), + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "containerName", + Image: "containerImage", + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + }, + } + pods = append(pods, unScheduledPod) + } + var pendingScheduledPodWithNoIP *corev1.Pod + if includeScheduledButNoIP { + pendingScheduledPodWithNoIP = newPodWithLabels(eipNamespace, "egress-pod1", node2Name, "", egressPodLabel) + pendingScheduledPodWithNoIP.Status.Phase = corev1.PodPending + pods = append(pods, *pendingScheduledPodWithNoIP) + } + if includeHostNetwork { + hnPod := *newPodWithLabels(eipNamespace, "egress-pod2", node2Name, node2IPv4, egressPodLabel) + hnPod.Spec.HostNetwork = true + pods = append(pods, hnPod) + } + + dynamicNeighRouters := "true" + if config.OVNKubernetesFeature.EnableInterconnect { + dynamicNeighRouters = "false" + } + logicalRouterOptions := map[string]string{ + "dynamic_neigh_routers": dynamicNeighRouters, + } + expectedNatLogicalPort1 := "k8s-node1" + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + // stale LRP reference to be removed by EIP controller sync function. + Policies: []string{"hostnet-pod-reroute-UUID1"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + // stale NAT reference to be removed by EIP controller sync function. + Nat: []string{"hostnet-pod-egressip-nat-UUID1"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + // stale LRP and SNAT entries (for a host networked pod) to be removed by EIP controller sync function. + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", node2IPv4), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: getEgressIPLRPReRouteDbIDs(egressIPName, eipNamespace, "egress-pod2", IPFamilyValueV4, types.DefaultNetworkName, DefaultNetworkControllerName).GetExternalIDs(), + UUID: "hostnet-pod-reroute-UUID1", + }, + &nbdb.NAT{ + UUID: "hostnet-pod-egressip-nat-UUID1", + LogicalIP: node2IPv4, + ExternalIP: egressIP1, + ExternalIDs: getEgressIPNATDbIDs(egressIPName, eipNamespace, "egress-pod2", IPFamilyValueV4, DefaultNetworkControllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + node1Switch, + node2Switch, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &corev1.NodeList{ + Items: []corev1.Node{node1, node2}, + }, + &corev1.NamespaceList{ + Items: []corev1.Namespace{*egressNamespace}, + }, + &corev1.PodList{ + Items: pods, + }, + ) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod3, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + if pendingScheduledPodWithNoIP != nil { + fakeOvn.controller.logicalPortCache.add(pendingScheduledPodWithNoIP, "", types.DefaultNetworkName, "", nil, []*net.IPNet{}) + } + i, n, _ = net.ParseCIDR(podV4IP2 + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod4, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if isPodRemote { + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, false) + } + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressSVCServedPodsASv4, _ := buildEgressServiceAddressSets(nil) + egressIPServedPodsASv4, _ := buildEgressIPServedPodsAddressSets([]string{podV4IP, podV4IP2}, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName) + egressNodeIPsASv4, _ := buildEgressIPNodeAddressSets([]string{node1IPv4, node2IPv4}) + + node1Switch.QOSRules = []string{"default-QoS-UUID"} + if !isPodRemote { + node2Switch.QOSRules = []string{"default-QoS-UUID"} + } + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", + egressIPServedPodsASv4.Name, egressSVCServedPodsASv4.Name, egressNodeIPsASv4.Name), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, + ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + getNoReRouteReplyTrafficPolicy(types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToPodDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToJoinDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "default-no-reroute-node-UUID", "default-no-reroute-reply-traffic"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + node1Switch, + node2Switch, + getDefaultQoSRule(false, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + egressSVCServedPodsASv4, egressIPServedPodsASv4, egressNodeIPsASv4, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + for i := 0; i < 10; i++ { + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node1Name, egressIPName, egressIP1) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + eips, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", + egressIPServedPodsASv4.Name, egressSVCServedPodsASv4.Name, egressNodeIPsASv4.Name), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, + ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + getNoReRouteReplyTrafficPolicy(types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToPodDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToJoinDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod3.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod3.Namespace, egressPod3.Name, IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + UUID: "reroute-UUID1", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod4.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod4.Namespace, egressPod4.Name, IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + UUID: "reroute-UUID2", + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "default-no-reroute-node-UUID", "reroute-UUID1", "reroute-UUID2", "default-no-reroute-reply-traffic"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Nat: []string{"pod1-egressip-nat-UUID1", "pod4-egressip-nat-UUID1"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.NAT{ + UUID: "pod1-egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: eips[0], + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod3.Namespace, egressPod3.Name, IPFamilyValueV4, fakeOvn.controller.controllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.NAT{ + UUID: "pod4-egressip-nat-UUID1", + LogicalIP: podV4IP2, + ExternalIP: eips[0], + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod4.Namespace, egressPod4.Name, IPFamilyValueV4, fakeOvn.controller.controllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + node1Switch, + node2Switch, + getDefaultQoSRule(false, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + egressSVCServedPodsASv4, egressIPServedPodsASv4, egressNodeIPsASv4, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + // Try out EgressIP failover scenarios for the next iteration. + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, egressIPName, egressIP1) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + _, nodes = getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.Name)) + // remove label from node1 and node2. + node1.Labels = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node2.Labels = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + status := []egressipv1.EgressIPStatusItem{} + err = fakeOvn.controller.eIPC.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgo.Entry("unscheduled pending pod only, interconnect disabled", true, false, false, false, false), + ginkgo.Entry("scheduled pending pod only, interconnect disabled", false, true, false, false, false), + ginkgo.Entry("host-networked pod only, interconnect disabled", false, false, true, false, false), + ginkgo.Entry("unscheduled pending pod only, interconnect enabled, local pod", true, false, false, true, false), + ginkgo.Entry("scheduled pending pod only, interconnect enabled, local pod", false, true, false, true, false), + ginkgo.Entry("host-networked pod only, interconnect enabled, local pod", false, false, true, true, false), + ginkgo.Entry("unscheduled pending pod only, interconnect enabled, remote pod", true, false, false, true, true), + ginkgo.Entry("scheduled pending pod only, interconnect enabled, remote pod", false, true, false, true, true), + ginkgo.Entry("host-networked pod only, interconnect enabled, remote pod", false, false, true, true, true), + ) + ginkgo.It("should not deadlock when two EgressIP pods are hosted and egress through opposite nodes", func() { // This test is designed to reproduce a deadlock issue. // To reproduce undo the corrspoinding fix introduced in this commit and From c74e67eb4e003a75ab38898cabcd5aeea09bc01b Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Fri, 17 Oct 2025 16:56:31 +0200 Subject: [PATCH 19/20] [okep: layer2 router topology] Add clarification for joinIP routes. Signed-off-by: Nadia Pinaeva --- docs/okeps/okep-5094-layer2-transit-router.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/okeps/okep-5094-layer2-transit-router.md b/docs/okeps/okep-5094-layer2-transit-router.md index 48aa64917e..d4c6460ab0 100644 --- a/docs/okeps/okep-5094-layer2-transit-router.md +++ b/docs/okeps/okep-5094-layer2-transit-router.md @@ -906,7 +906,14 @@ Nodes using the new topology must perform the following actions: - Create a tmp transit router port `trtos-layer2-switch-upgrade` with the GR MAC address - Add a dummy IP from the join subnet to the `trtos-layer2-switch-upgrade` port to enable pod on node1 -> remote GR traffic. - Add `trasit_router` routes to steer joinIP traffic for the ole nodes to the `trtos-layer2-switch-upgrade` port. - These routes look silly, but they work, like `100.65.0.3 100.65.0.3 dst-ip` for every joinIP of the old nodes. + These routes look weird, but they work, like `100.65.0.3 100.65.0.3 dst-ip` for every joinIP of the old nodes. We need this because + in OVN routes with dst-ip and src-ip policies are evaluated at the same time and selected based on longest-prefix-match, + and we have the following route for pod network `10.10.0.0/24`: + `10.10.0.0/24 10.10.0.2 src-ip` + Connected route for the join IP is based on the joinSubnet, which is always `/16`. + That means, that for traffic with src IP from the podSubnet dst IP from joinSubnet, + the winning route is joinIP if podSubnet mask is <= 16 and podSubnet otherwise. + That is not the desired behavior, that is why we add always-winning routes (aka /32 or /128) for joinIPs. We need to make sure joinSubnet works between upgraded and non-upgraded nodes, as this is the only network that both topologies understand: From fed1225168c8c165bf096a2e1b6fa83e9cc64c90 Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Mon, 20 Oct 2025 14:45:07 +0200 Subject: [PATCH 20/20] Resolve merge conflicts Signed-off-by: Periyasamy Palanisamy --- .../pkg/ovn/controller/services/lb_config.go | 10 ---------- .../services/lb_config_ocphack_test.go | 18 +++++++++--------- .../ovn/controller/services/lb_config_test.go | 2 +- 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/go-controller/pkg/ovn/controller/services/lb_config.go b/go-controller/pkg/ovn/controller/services/lb_config.go index 4489c6dd8e..a06059e31a 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config.go +++ b/go-controller/pkg/ovn/controller/services/lb_config.go @@ -44,17 +44,7 @@ type lbConfig struct { hasNodePort bool } -<<<<<<< HEAD -type lbEndpoints struct { - Port int32 - V4IPs []string - V6IPs []string -} - func makeNodeSwitchTargetIPs(service *corev1.Service, node string, c *lbConfig) (targetIPsV4, targetIPsV6 []string, v4Changed, v6Changed bool) { -======= -func makeNodeSwitchTargetIPs(node string, c *lbConfig) (targetIPsV4, targetIPsV6 []string, v4Changed, v6Changed bool) { ->>>>>>> upstream/master targetIPsV4 = c.clusterEndpoints.V4IPs targetIPsV6 = c.clusterEndpoints.V6IPs diff --git a/go-controller/pkg/ovn/controller/services/lb_config_ocphack_test.go b/go-controller/pkg/ovn/controller/services/lb_config_ocphack_test.go index 48975cd65a..bc51c4f218 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config_ocphack_test.go +++ b/go-controller/pkg/ovn/controller/services/lb_config_ocphack_test.go @@ -90,7 +90,7 @@ func Test_buildPerNodeLBs_OCPHackForDNS(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: 8080, }, @@ -263,11 +263,11 @@ func Test_buildPerNodeLBs_OCPHackForLocalWithFallback(t *testing.T) { inport: 5, // node port externalTrafficLocal: true, hasNodePort: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: map[string]util.LBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: outport}, nodeB: {V4IPs: []string{"10.128.1.2"}, Port: outport}, }, @@ -277,11 +277,11 @@ func Test_buildPerNodeLBs_OCPHackForLocalWithFallback(t *testing.T) { protocol: corev1.ProtocolTCP, inport: inport, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: map[string]util.LBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: outport}, nodeB: {V4IPs: []string{"10.128.1.2"}, Port: outport}, }, @@ -362,11 +362,11 @@ func Test_buildPerNodeLBs_OCPHackForLocalWithFallback(t *testing.T) { inport: 5, // node port externalTrafficLocal: true, hasNodePort: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.1.2"}, // only endpoint on node-b is running Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: map[string]util.LBEndpoints{ nodeB: {V4IPs: []string{"10.128.1.2"}, Port: outport}, }, }, @@ -375,11 +375,11 @@ func Test_buildPerNodeLBs_OCPHackForLocalWithFallback(t *testing.T) { protocol: corev1.ProtocolTCP, inport: inport, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: map[string]util.LBEndpoints{ nodeB: {V4IPs: []string{"10.128.1.2"}, Port: outport}, }, }, diff --git a/go-controller/pkg/ovn/controller/services/lb_config_test.go b/go-controller/pkg/ovn/controller/services/lb_config_test.go index a743e29386..3b22e1e2bf 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config_test.go +++ b/go-controller/pkg/ovn/controller/services/lb_config_test.go @@ -4592,7 +4592,7 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.1.1"}, // on nodeB V6IPs: []string{"fe00:0:0:0:2::2"}, // on nodeB Port: 8080,