diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 00cfdbd30b..c3b01607de 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -3,7 +3,7 @@ name: ovn-ci on: merge_group: pull_request: - branches: [ master ] + branches: [ master,release-* ] # Only run jobs if at least one non-doc file is changed paths-ignore: - '**/*.md' @@ -30,7 +30,7 @@ env: # This must be a directory CI_IMAGE_CACHE: tmp/image_cache/ - CI_IMAGE_MASTER_TAR: image-master.tar + CI_IMAGE_BASE_TAR: image-base.tar CI_IMAGE_PR_TAR: image-pr.tar CI_DIST_IMAGES_OUTPUT: dist/images/_output/ @@ -65,49 +65,49 @@ jobs: working-directory: go-controller args: --modules-download-mode=vendor --timeout=15m0s --verbose - build-master: - name: Build-master + build-base: + name: Build-base runs-on: ubuntu-24.04 steps: - # Create a cache for the built master image - - name: Restore master image cache - id: image_cache_master + # Create a cache for the built base image + - name: Restore base image cache + id: image_cache_base uses: actions/cache@v4 with: path: | ${{ env.CI_IMAGE_CACHE }} - key: ${{ github.run_id }}-image-cache-master - # if CI_IMAGE_MASTER_TAR isn't in cache, try pulling it and saving to the cache rather + key: ${{ github.run_id }}-image-cache-base + # if CI_IMAGE_BASE_TAR isn't in cache, try pulling it and saving to the cache rather # than building, resort back to building if the cache isn't populated and # pulling the image fails. - - name: Check if master image build is needed - id: is_master_image_build_needed + - name: Check if base image build is needed + id: is_base_image_build_needed continue-on-error: false run: | set -x - if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR}.gz ]; then - cp ${CI_IMAGE_CACHE}/${CI_IMAGE_MASTER_TAR}.gz ${CI_IMAGE_MASTER_TAR}.gz - gunzip ${CI_IMAGE_MASTER_TAR}.gz - echo "MASTER_IMAGE_RESTORED_FROM_CACHE=true" >> "$GITHUB_OUTPUT" - echo "MASTER_IMAGE_RESTORED=true" >> "$GITHUB_OUTPUT" + if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR}.gz ]; then + cp ${CI_IMAGE_CACHE}/${CI_IMAGE_BASE_TAR}.gz ${CI_IMAGE_BASE_TAR}.gz + gunzip ${CI_IMAGE_BASE_TAR}.gz + echo "BASE_IMAGE_RESTORED_FROM_CACHE=true" >> "$GITHUB_OUTPUT" + echo "BASE_IMAGE_RESTORED=true" >> "$GITHUB_OUTPUT" exit 0 fi - if docker pull ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora:master; then - docker tag ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora:master ovn-daemonset-fedora:dev + if docker pull ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora:${{ github.base_ref || github.ref_name }}; then + docker tag ghcr.io/ovn-kubernetes/ovn-kubernetes/ovn-kube-fedora:${{ github.base_ref || github.ref_name }} ovn-daemonset-fedora:dev - echo "MASTER_IMAGE_RESTORED=true" >> "$GITHUB_OUTPUT" + echo "BASE_IMAGE_RESTORED=true" >> "$GITHUB_OUTPUT" exit 0 fi - # only run the following steps if the master image was not found in the cache - - name: Check out code into the Go module directory - from master branch - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED != 'true' && success() + # only run the following steps if the base image was not found in the cache + - name: Check out code into the Go module directory - from base branch + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED != 'true' && success() uses: actions/checkout@v4 with: - ref: master + ref: ${{ github.base_ref || github.ref_name }} - name: Set up Go - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED != 'true' && success() + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED != 'true' && success() uses: actions/setup-go@v5 with: go-version-file: 'go-controller/go.mod' @@ -118,8 +118,8 @@ jobs: cache: false id: go - - name: Build - from master branch - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED != 'true' && success() + - name: Build - from base branch + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED != 'true' && success() run: | set -x pushd go-controller @@ -127,8 +127,8 @@ jobs: make windows popd - - name: Build docker image - from master branch - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED != 'true' && success() + - name: Build docker image - from base branch + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED != 'true' && success() run: | make -C dist/images \ IMAGE=ovn-daemonset-fedora:dev \ @@ -136,27 +136,27 @@ jobs: OVN_GITREF=${{ env.OVN_GITREF }} \ fedora-image - - name: Cache master image - if: steps.is_master_image_build_needed.outputs.MASTER_IMAGE_RESTORED_FROM_CACHE != 'true' && success() + - name: Cache base image + if: steps.is_base_image_build_needed.outputs.BASE_IMAGE_RESTORED_FROM_CACHE != 'true' && success() continue-on-error: false run: | set -x - if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR} ]; then - rm -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR} + if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR} ]; then + rm -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR} fi - if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR}.gz ]; then - rm -f ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR}.gz + if [ -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR}.gz ]; then + rm -f ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR}.gz fi - docker save ovn-daemonset-fedora:dev -o ${CI_IMAGE_MASTER_TAR} + docker save ovn-daemonset-fedora:dev -o ${CI_IMAGE_BASE_TAR} mkdir -p ${CI_IMAGE_CACHE} - cp ${CI_IMAGE_MASTER_TAR} ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR} - gzip ${CI_IMAGE_CACHE}${CI_IMAGE_MASTER_TAR} + cp ${CI_IMAGE_BASE_TAR} ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR} + gzip ${CI_IMAGE_CACHE}${CI_IMAGE_BASE_TAR} # run the following always if none of the steps before failed - uses: actions/upload-artifact@v4 with: - name: test-image-master - path: ${{ env.CI_IMAGE_MASTER_TAR }} + name: test-image-base + path: ${{ env.CI_IMAGE_BASE_TAR }} build-pr: name: Build-PR @@ -273,12 +273,12 @@ jobs: path: ${{ env.CI_DIST_IMAGES_OUTPUT }}/${{ env.CI_IMAGE_PR_TAR }} ovn-upgrade-e2e: - name: Upgrade OVN from Master to PR branch based image + name: Upgrade OVN from Base to PR branch based image if: github.event_name != 'schedule' runs-on: ubuntu-24.04 timeout-minutes: 120 needs: - - build-master + - build-base - build-pr strategy: fail-fast: false @@ -296,10 +296,10 @@ jobs: OVN_MULTICAST_ENABLE: "false" steps: - - name: Check out code into the Go module directory - from Master branch + - name: Check out code into the Go module directory - from Base branch uses: actions/checkout@v4 with: - ref: master + ref: ${{ github.base_ref || github.ref_name }} - name: Set up Go uses: actions/setup-go@v5 @@ -324,10 +324,10 @@ jobs: - name: Free up disk space uses: ./.github/actions/free-disk-space - - name: Download test-image-master + - name: Download test-image-base uses: actions/download-artifact@v4 with: - name: test-image-master + name: test-image-base - name: Disable ufw # For IPv6 and Dualstack, ufw (Uncomplicated Firewall) should be disabled. @@ -337,7 +337,7 @@ jobs: - name: Load docker image run: | - docker load --input ${CI_IMAGE_MASTER_TAR} && rm -rf ${CI_IMAGE_MASTER_TAR} + docker load --input ${CI_IMAGE_BASE_TAR} && rm -rf ${CI_IMAGE_BASE_TAR} - name: kind setup run: | diff --git a/docs/okeps/okep-5094-layer2-transit-router.md b/docs/okeps/okep-5094-layer2-transit-router.md index 48aa64917e..d4c6460ab0 100644 --- a/docs/okeps/okep-5094-layer2-transit-router.md +++ b/docs/okeps/okep-5094-layer2-transit-router.md @@ -906,7 +906,14 @@ Nodes using the new topology must perform the following actions: - Create a tmp transit router port `trtos-layer2-switch-upgrade` with the GR MAC address - Add a dummy IP from the join subnet to the `trtos-layer2-switch-upgrade` port to enable pod on node1 -> remote GR traffic. - Add `trasit_router` routes to steer joinIP traffic for the ole nodes to the `trtos-layer2-switch-upgrade` port. - These routes look silly, but they work, like `100.65.0.3 100.65.0.3 dst-ip` for every joinIP of the old nodes. + These routes look weird, but they work, like `100.65.0.3 100.65.0.3 dst-ip` for every joinIP of the old nodes. We need this because + in OVN routes with dst-ip and src-ip policies are evaluated at the same time and selected based on longest-prefix-match, + and we have the following route for pod network `10.10.0.0/24`: + `10.10.0.0/24 10.10.0.2 src-ip` + Connected route for the join IP is based on the joinSubnet, which is always `/16`. + That means, that for traffic with src IP from the podSubnet dst IP from joinSubnet, + the winning route is joinIP if podSubnet mask is <= 16 and podSubnet otherwise. + That is not the desired behavior, that is why we add always-winning routes (aka /32 or /128) for joinIPs. We need to make sure joinSubnet works between upgraded and non-upgraded nodes, as this is the only network that both topologies understand: diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 72a22defbd..56cdb4c293 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -13,6 +13,7 @@ import ( "strings" "time" + "github.com/openshift/api/config/v1" "github.com/urfave/cli/v2" gcfg "gopkg.in/gcfg.v1" lumberjack "gopkg.in/natefinch/lumberjack.v2" @@ -28,6 +29,28 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ) +// getSupportedPlatformTypes returns a list of all supported platform types +func getSupportedPlatformTypes() []string { + return []string{ + string(v1.AWSPlatformType), // "AWS" + string(v1.AzurePlatformType), // "Azure" + string(v1.BareMetalPlatformType), // "BareMetal" + string(v1.GCPPlatformType), // "GCP" + string(v1.LibvirtPlatformType), // "Libvirt" + string(v1.OpenStackPlatformType), // "OpenStack" + string(v1.NonePlatformType), // "None" + string(v1.VSpherePlatformType), // "VSphere" + string(v1.OvirtPlatformType), // "oVirt" + string(v1.IBMCloudPlatformType), // "IBMCloud" + string(v1.KubevirtPlatformType), // "KubeVirt" + string(v1.EquinixMetalPlatformType), // "EquinixMetal" + string(v1.PowerVSPlatformType), // "PowerVS" + string(v1.AlibabaCloudPlatformType), // "AlibabaCloud" + string(v1.NutanixPlatformType), // "Nutanix" + string(v1.ExternalPlatformType), // "External" + } +} + // DefaultEncapPort number used if not supplied const DefaultEncapPort = 6081 @@ -377,7 +400,6 @@ type KubernetesConfig struct { ServiceCIDRs []*net.IPNet OVNConfigNamespace string `gcfg:"ovn-config-namespace"` OVNEmptyLbEvents bool `gcfg:"ovn-empty-lb-events"` - PodIP string `gcfg:"pod-ip"` // UNUSED RawNoHostSubnetNodes string `gcfg:"no-hostsubnet-nodes"` NoHostSubnetNodes labels.Selector HostNetworkNamespace string `gcfg:"host-network-namespace"` @@ -945,7 +967,7 @@ var CommonFlags = []cli.Flag{ // Logfile rotation parameters &cli.IntFlag{ Name: "logfile-maxsize", - Usage: "Maximum size in bytes of the log file before it gets rolled", + Usage: "Maximum size in megabytes of the log file before it gets rolled", Destination: &cliConfig.Logging.LogFileMaxSize, Value: Logging.LogFileMaxSize, }, @@ -1277,7 +1299,7 @@ var K8sFlags = []cli.Flag{ &cli.StringFlag{ Name: "platform-type", Usage: "The cloud provider platform type ovn-kubernetes is deployed on. " + - "Valid values can be found in: https://github.com/ovn-org/ovn-kubernetes/blob/master/go-controller/vendor/github.com/openshift/api/config/v1/types_infrastructure.go#L130-L172", + "Valid values: " + strings.Join(getSupportedPlatformTypes(), ", "), Destination: &cliConfig.Kubernetes.PlatformType, Value: Kubernetes.PlatformType, }, diff --git a/go-controller/pkg/kube/kube.go b/go-controller/pkg/kube/kube.go index 7eccec3d7f..6bb79e2311 100644 --- a/go-controller/pkg/kube/kube.go +++ b/go-controller/pkg/kube/kube.go @@ -55,8 +55,6 @@ type Interface interface { SetAnnotationsOnService(namespace, serviceName string, annotations map[string]interface{}) error SetAnnotationsOnNode(nodeName string, annotations map[string]interface{}) error SetAnnotationsOnNamespace(namespaceName string, annotations map[string]interface{}) error - SetTaintOnNode(nodeName string, taint *corev1.Taint) error - RemoveTaintFromNode(nodeName string, taint *corev1.Taint) error SetLabelsOnNode(nodeName string, labels map[string]interface{}) error PatchNode(old, new *corev1.Node) error UpdateNodeStatus(node *corev1.Node) error @@ -197,68 +195,6 @@ func (k *Kube) SetAnnotationsOnService(namespace, name string, annotations map[s return err } -// SetTaintOnNode tries to add a new taint to the node. If the taint already exists, it doesn't do anything. -func (k *Kube) SetTaintOnNode(nodeName string, taint *corev1.Taint) error { - node, err := k.GetNodeForWindows(nodeName) - if err != nil { - klog.Errorf("Unable to retrieve node %s for tainting %s: %v", nodeName, taint.ToString(), err) - return err - } - newNode := node.DeepCopy() - nodeTaints := newNode.Spec.Taints - - var newTaints []corev1.Taint - for i := range nodeTaints { - if taint.MatchTaint(&nodeTaints[i]) { - klog.Infof("Taint %s already exists on Node %s", taint.ToString(), node.Name) - return nil - } - newTaints = append(newTaints, nodeTaints[i]) - } - - klog.Infof("Setting taint %s on Node %s", taint.ToString(), node.Name) - newTaints = append(newTaints, *taint) - newNode.Spec.Taints = newTaints - err = k.PatchNode(node, newNode) - if err != nil { - klog.Errorf("Unable to add taint %s on node %s: %v", taint.ToString(), node.Name, err) - return err - } - - klog.Infof("Added taint %s on node %s", taint.ToString(), node.Name) - return nil -} - -// RemoveTaintFromNode removes all the taints that have the same key and effect from the node. -// If the taint doesn't exist, it doesn't do anything. -func (k *Kube) RemoveTaintFromNode(nodeName string, taint *corev1.Taint) error { - node, err := k.GetNodeForWindows(nodeName) - if err != nil { - klog.Errorf("Unable to retrieve node %s for tainting %s: %v", nodeName, taint.ToString(), err) - return err - } - newNode := node.DeepCopy() - nodeTaints := newNode.Spec.Taints - - var newTaints []corev1.Taint - for i := range nodeTaints { - if taint.MatchTaint(&nodeTaints[i]) { - klog.Infof("Removing taint %s from Node %s", taint.ToString(), node.Name) - continue - } - newTaints = append(newTaints, nodeTaints[i]) - } - - newNode.Spec.Taints = newTaints - err = k.PatchNode(node, newNode) - if err != nil { - klog.Errorf("Unable to remove taint %s on node %s: %v", taint.ToString(), node.Name, err) - return err - } - klog.Infof("Removed taint %s on node %s", taint.ToString(), node.Name) - return nil -} - // SetLabelsOnNode takes the node name and map of key/value string pairs to set as labels func (k *Kube) SetLabelsOnNode(nodeName string, labels map[string]interface{}) error { patch := struct { diff --git a/go-controller/pkg/kube/kube_test.go b/go-controller/pkg/kube/kube_test.go index d93119ff3a..72d6dd5a46 100644 --- a/go-controller/pkg/kube/kube_test.go +++ b/go-controller/pkg/kube/kube_test.go @@ -16,7 +16,6 @@ var _ = Describe("Kube", func() { Describe("Taint node operations", func() { var kube Kube var existingNodeTaints []corev1.Taint - var taint corev1.Taint var node *corev1.Node BeforeEach(func() { @@ -24,7 +23,6 @@ var _ = Describe("Kube", func() { kube = Kube{ KClient: fakeClient, } - taint = corev1.Taint{Key: "my-taint-key", Value: "my-taint-value", Effect: corev1.TaintEffectNoSchedule} }) JustBeforeEach(func() { @@ -43,90 +41,6 @@ var _ = Describe("Kube", func() { Expect(err).ToNot(HaveOccurred()) Expect(node).NotTo(BeZero()) }) - - Context("with a node not having the taint", func() { - - BeforeEach(func() { - existingNodeTaints = make([]corev1.Taint, 0) - }) - - Context("SetTaintOnNode", func() { - It("should add the taint to the node", func() { - err := kube.SetTaintOnNode(node.Name, &taint) - Expect(err).ToNot(HaveOccurred()) - - loadedNode, err := kube.KClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - Expect(loadedNode.Spec.Taints).To(ContainElement(taint)) - }) - }) - - Context("RemoveTaintFromNode", func() { - It("should remove nothing from the node", func() { - err := kube.RemoveTaintFromNode(node.Name, &taint) - Expect(err).ToNot(HaveOccurred()) - - loadedNode, err := kube.KClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - Expect(loadedNode.Spec.Taints).To(HaveLen(len(node.Spec.Taints))) - }) - }) - }) - - Context("with a node having the same taint already", func() { - - BeforeEach(func() { - existingNodeTaints = []corev1.Taint{taint} - }) - - Context("SetTaintOnNode", func() { - It("should update the taint of the node if effect differs", func() { - updatedTaint := taint.DeepCopy() - updatedTaint.Effect = corev1.TaintEffectPreferNoSchedule - - err := kube.SetTaintOnNode(node.Name, updatedTaint) - Expect(err).ToNot(HaveOccurred()) - - loadedNode, err := kube.KClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - Expect(loadedNode.Spec.Taints).To(ContainElement(*updatedTaint)) - }) - - It("should update nothing if taint is the same", func() { - err := kube.SetTaintOnNode(node.Name, &taint) - Expect(err).ToNot(HaveOccurred()) - - updatedNode, err := kube.GetNodeForWindows(node.Name) - Expect(err).ToNot(HaveOccurred()) - Expect(updatedNode.Spec.Taints).To(Equal([]corev1.Taint{taint})) - }) - }) - - Context("RemoveTaintFromNode", func() { - It("should remove the taint from the node", func() { - err := kube.RemoveTaintFromNode(node.Name, &taint) - Expect(err).ToNot(HaveOccurred()) - - loadedNode, err := kube.KClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{}) - Expect(err).ToNot(HaveOccurred()) - Expect(loadedNode.Spec.Taints).NotTo(ContainElement(taint)) - Expect(loadedNode.Spec.Taints).To(HaveLen(len(node.Spec.Taints) - 1)) - }) - }) - }) - - Context("with references to a node which doesn't exist", func() { - - Context("SetTaintOnNode", func() { - - It("should return an error, if node doesn't exist", func() { - nodeName := "targaryen" - err := kube.SetTaintOnNode(nodeName, &taint) - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(Equal("nodes \"targaryen\" not found")) - }) - }) - }) }) Describe("SetAnnotationsOnPod", func() { diff --git a/go-controller/pkg/kube/mocks/Interface.go b/go-controller/pkg/kube/mocks/Interface.go index 594d33d699..2e68b7898f 100644 --- a/go-controller/pkg/kube/mocks/Interface.go +++ b/go-controller/pkg/kube/mocks/Interface.go @@ -145,24 +145,6 @@ func (_m *Interface) PatchNode(old *corev1.Node, new *corev1.Node) error { return r0 } -// RemoveTaintFromNode provides a mock function with given fields: nodeName, taint -func (_m *Interface) RemoveTaintFromNode(nodeName string, taint *corev1.Taint) error { - ret := _m.Called(nodeName, taint) - - if len(ret) == 0 { - panic("no return value specified for RemoveTaintFromNode") - } - - var r0 error - if rf, ok := ret.Get(0).(func(string, *corev1.Taint) error); ok { - r0 = rf(nodeName, taint) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // SetAnnotationsOnNamespace provides a mock function with given fields: namespaceName, annotations func (_m *Interface) SetAnnotationsOnNamespace(namespaceName string, annotations map[string]interface{}) error { ret := _m.Called(namespaceName, annotations) @@ -253,24 +235,6 @@ func (_m *Interface) SetLabelsOnNode(nodeName string, labels map[string]interfac return r0 } -// SetTaintOnNode provides a mock function with given fields: nodeName, taint -func (_m *Interface) SetTaintOnNode(nodeName string, taint *corev1.Taint) error { - ret := _m.Called(nodeName, taint) - - if len(ret) == 0 { - panic("no return value specified for SetTaintOnNode") - } - - var r0 error - if rf, ok := ret.Get(0).(func(string, *corev1.Taint) error); ok { - r0 = rf(nodeName, taint) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // UpdateNodeStatus provides a mock function with given fields: node func (_m *Interface) UpdateNodeStatus(node *corev1.Node) error { ret := _m.Called(node) diff --git a/go-controller/pkg/kube/mocks/InterfaceOVN.go b/go-controller/pkg/kube/mocks/InterfaceOVN.go index 18e93ed800..ef253ba2be 100644 --- a/go-controller/pkg/kube/mocks/InterfaceOVN.go +++ b/go-controller/pkg/kube/mocks/InterfaceOVN.go @@ -415,24 +415,6 @@ func (_m *InterfaceOVN) SetLabelsOnNode(nodeName string, labels map[string]inter return r0 } -// SetTaintOnNode provides a mock function with given fields: nodeName, taint -func (_m *InterfaceOVN) SetTaintOnNode(nodeName string, taint *apicorev1.Taint) error { - ret := _m.Called(nodeName, taint) - - if len(ret) == 0 { - panic("no return value specified for SetTaintOnNode") - } - - var r0 error - if rf, ok := ret.Get(0).(func(string, *apicorev1.Taint) error); ok { - r0 = rf(nodeName, taint) - } else { - r0 = ret.Error(0) - } - - return r0 -} - // UpdateCloudPrivateIPConfig provides a mock function with given fields: cloudPrivateIPConfig func (_m *InterfaceOVN) UpdateCloudPrivateIPConfig(cloudPrivateIPConfig *v1.CloudPrivateIPConfig) (*v1.CloudPrivateIPConfig, error) { ret := _m.Called(cloudPrivateIPConfig) diff --git a/go-controller/pkg/metrics/ovs.go b/go-controller/pkg/metrics/ovs.go index 718fa031e7..b4c95ec86f 100644 --- a/go-controller/pkg/metrics/ovs.go +++ b/go-controller/pkg/metrics/ovs.go @@ -907,7 +907,7 @@ func registerOvsMetrics(ovsDBClient libovsdbclient.Client, metricsScrapeInterval } // OVS datapath metrics updater - go ovsDatapathMetricsUpdater(util.RunOVSAppctl, metricsScrapeInterval, stopChan) + go ovsDatapathMetricsUpdater(util.RunOvsVswitchdAppCtl, metricsScrapeInterval, stopChan) // OVS bridge metrics updater go ovsBridgeMetricsUpdater(ovsDBClient, util.RunOVSOfctl, metricsScrapeInterval, stopChan) // OVS interface metrics updater diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index e3923fcc54..cd81267cc2 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -511,13 +511,7 @@ func setEncapPort(ctx context.Context) error { func isOVNControllerReady() (bool, error) { // check node's connection status - runDir := util.GetOvnRunDir() - pid, err := os.ReadFile(runDir + "ovn-controller.pid") - if err != nil { - return false, fmt.Errorf("unknown pid for ovn-controller process: %v", err) - } - ctlFile := runDir + fmt.Sprintf("ovn-controller.%s.ctl", strings.TrimSuffix(string(pid), "\n")) - ret, _, err := util.RunOVSAppctl("-t", ctlFile, "connection-status") + ret, _, err := util.RunOVNControllerAppCtl("connection-status") if err != nil { return false, fmt.Errorf("could not get connection status: %w", err) } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index b46657c5c2..e23fe93729 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -19,6 +19,7 @@ import ( "github.com/containernetworking/plugins/pkg/testutils" nadfake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" "github.com/k8snetworkplumbingwg/sriovnet" + "github.com/spf13/afero" "github.com/stretchr/testify/mock" "github.com/urfave/cli/v2" "github.com/vishvananda/netlink" @@ -203,7 +204,7 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, Output: systemID, }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-appctl --timeout=15 dpif/show-dp-features breth0", + Cmd: "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl dpif/show-dp-features breth0", Output: "Check pkt length action: Yes", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ @@ -211,7 +212,7 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, Output: fmt.Sprintf("%t", hwOffload), }) fexec.AddFakeCmdsNoOutputNoError([]string{ - "ovs-appctl --timeout=15 fdb/add breth0 breth0 0 " + eth0MAC, + "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl fdb/add breth0 breth0 0 " + eth0MAC, }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: "ovs-vsctl --timeout=15 get Interface patch-breth0_node1-to-br-int ofport", @@ -258,7 +259,13 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, }) // syncServices() - err := util.SetExec(fexec) + // Setup mock filesystem for ovs-vswitchd.pid file needed by ovs-appctl commands + err := util.AppFs.MkdirAll("/var/run/openvswitch/", 0o755) + Expect(err).NotTo(HaveOccurred()) + err = afero.WriteFile(util.AppFs, "/var/run/openvswitch/ovs-vswitchd.pid", []byte("1234"), 0o644) + Expect(err).NotTo(HaveOccurred()) + + err = util.SetExec(fexec) Expect(err).NotTo(HaveOccurred()) _, err = config.InitConfig(ctx, fexec, nil) @@ -653,7 +660,7 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, }) // DetectCheckPktLengthSupport fexec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-appctl --timeout=15 dpif/show-dp-features " + brphys, + Cmd: "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl dpif/show-dp-features " + brphys, Output: "Check pkt length action: Yes", }) // IsOvsHwOffloadEnabled @@ -662,7 +669,7 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, Output: "false", }) fexec.AddFakeCmdsNoOutputNoError([]string{ - fmt.Sprintf("ovs-appctl --timeout=15 fdb/add %s %s 0 %s", brphys, brphys, hostMAC), + fmt.Sprintf("ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl fdb/add %s %s 0 %s", brphys, brphys, hostMAC), }) // GetDPUHostInterface fexec.AddFakeCmd(&ovntest.ExpectedCmd{ @@ -714,7 +721,14 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, Output: "7", }) // syncServices() - err := util.SetExec(fexec) + + // Setup mock filesystem for ovs-vswitchd.pid file needed by ovs-appctl commands + err := util.AppFs.MkdirAll("/var/run/openvswitch/", 0o755) + Expect(err).NotTo(HaveOccurred()) + err = afero.WriteFile(util.AppFs, "/var/run/openvswitch/ovs-vswitchd.pid", []byte("1234"), 0o644) + Expect(err).NotTo(HaveOccurred()) + + err = util.SetExec(fexec) Expect(err).NotTo(HaveOccurred()) _, err = config.InitConfig(ctx, fexec, nil) @@ -1118,7 +1132,7 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0` Output: systemID, }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-appctl --timeout=15 dpif/show-dp-features breth0", + Cmd: "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl dpif/show-dp-features breth0", Output: "Check pkt length action: Yes", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ @@ -1126,7 +1140,7 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0` Output: "false", }) fexec.AddFakeCmdsNoOutputNoError([]string{ - "ovs-appctl --timeout=15 fdb/add breth0 breth0 0 " + eth0MAC, + "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl fdb/add breth0 breth0 0 " + eth0MAC, }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: "ovs-vsctl --timeout=15 get Interface patch-breth0_node1-to-br-int ofport", @@ -1178,7 +1192,13 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0` }) // syncServices() - err := util.SetExec(fexec) + // Setup mock filesystem for ovs-vswitchd.pid file needed by ovs-appctl commands + err := util.AppFs.MkdirAll("/var/run/openvswitch/", 0o755) + Expect(err).NotTo(HaveOccurred()) + err = afero.WriteFile(util.AppFs, "/var/run/openvswitch/ovs-vswitchd.pid", []byte("1234"), 0o644) + Expect(err).NotTo(HaveOccurred()) + + err = util.SetExec(fexec) Expect(err).NotTo(HaveOccurred()) _, err = config.InitConfig(ctx, fexec, nil) diff --git a/go-controller/pkg/node/gateway_iptables.go b/go-controller/pkg/node/gateway_iptables.go index 90bffbe91f..d68f770121 100644 --- a/go-controller/pkg/node/gateway_iptables.go +++ b/go-controller/pkg/node/gateway_iptables.go @@ -183,15 +183,30 @@ func computeProbability(n, i int) string { return fmt.Sprintf("%0.10f", 1.0/float64(n-i+1)) } -func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort corev1.ServicePort, externalIP string, localEndpoints []string) []nodeipt.Rule { - iptRules := make([]nodeipt.Rule, 0, len(localEndpoints)) +// generateIPTRulesForLoadBalancersWithoutNodePorts generates iptables DNAT rules for load balancer services +// without NodePort allocation. It performs statistical load balancing between endpoints via iptables. +func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort corev1.ServicePort, externalIP string, localEndpoints util.PortToLBEndpoints) []nodeipt.Rule { if len(localEndpoints) == 0 { // either its smart nic mode; etp&itp not implemented, OR // fetching endpointSlices error-ed out prior to reaching here so nothing to do - return iptRules + return []nodeipt.Rule{} } - numLocalEndpoints := len(localEndpoints) - for i, ip := range localEndpoints { + + // Get the endpoints for the port key. + // svcPortKey is of format e.g. "TCP/my-port-name" or "TCP/" if name is empty + // (is the case when only a single ServicePort is defined on this service). + svcPortKey := util.GetServicePortKey(svcPort.Protocol, svcPort.Name) + lbEndpoints := localEndpoints[svcPortKey] + + // Get IPv4 or IPv6 IPs, depending on the type of the service's external IP. + destinations := lbEndpoints.GetV4Destinations() + if utilnet.IsIPv6String(externalIP) { + destinations = lbEndpoints.GetV6Destinations() + } + + numLocalEndpoints := len(destinations) + iptRules := make([]nodeipt.Rule, 0, numLocalEndpoints) + for i, destination := range destinations { iptRules = append([]nodeipt.Rule{ { Table: "nat", @@ -201,7 +216,7 @@ func generateIPTRulesForLoadBalancersWithoutNodePorts(svcPort corev1.ServicePort "-d", externalIP, "--dport", fmt.Sprintf("%v", svcPort.Port), "-j", "DNAT", - "--to-destination", util.JoinHostPortInt32(ip, int32(svcPort.TargetPort.IntValue())), + "--to-destination", util.JoinHostPortInt32(destination.IP, destination.Port), "-m", "statistic", "--mode", "random", "--probability", computeProbability(numLocalEndpoints, i+1), @@ -476,7 +491,7 @@ func recreateIPTRules(table, chain string, keepIPTRules []nodeipt.Rule) error { // case3: if svcHasLocalHostNetEndPnt and svcTypeIsITPLocal, rule that redirects clusterIP traffic to host targetPort is added. // // if !svcHasLocalHostNetEndPnt and svcTypeIsITPLocal, rule that marks clusterIP traffic to steer it to ovn-k8s-mp0 is added. -func getGatewayIPTRules(service *corev1.Service, localEndpoints []string, svcHasLocalHostNetEndPnt bool) []nodeipt.Rule { +func getGatewayIPTRules(service *corev1.Service, localEndpoints util.PortToLBEndpoints, svcHasLocalHostNetEndPnt bool) []nodeipt.Rule { rules := make([]nodeipt.Rule, 0) clusterIPs := util.GetClusterIPs(service) svcTypeIsETPLocal := util.ServiceExternalTrafficPolicyLocal(service) diff --git a/go-controller/pkg/node/gateway_localnet_linux_test.go b/go-controller/pkg/node/gateway_localnet_linux_test.go index 49e4d1ee13..71d112b491 100644 --- a/go-controller/pkg/node/gateway_localnet_linux_test.go +++ b/go-controller/pkg/node/gateway_localnet_linux_test.go @@ -562,6 +562,7 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeLocal epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { @@ -579,8 +580,9 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.3"}, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will // come under !hasLocalHostNetEp case @@ -878,9 +880,11 @@ var _ = Describe("Node Operations", func() { Cmd: "ovs-ofctl show ", Err: fmt.Errorf("deliberate error to fall back to output:LOCAL"), }) + svcPortName := "http" service := *newServiceWithoutNodePortAllocation("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { + Name: svcPortName, Protocol: corev1.ProtocolTCP, Port: int32(80), TargetPort: intstr.FromInt(int(int32(8080))), @@ -910,11 +914,12 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.4"}, NodeName: &fakeNodeName, } - epPortName := "http" epPortValue := int32(8080) + epPortProtocol := corev1.ProtocolTCP epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &svcPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will // come under !hasLocalHostNetEp case @@ -994,6 +999,155 @@ var _ = Describe("Node Operations", func() { Expect(app.Run([]string{app.Name})).To(Succeed()) }) + It("inits iptables rules and openflows with named port and AllocateLoadBalancerNodePorts=False, ETP=local, LGW mode", func() { + app.Action = func(*cli.Context) error { + minNFakeCommands := nInitialFakeCommands + 1 + fExec.AddRepeatedFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ovs-ofctl show ", + }, minNFakeCommands) + + config.Gateway.Mode = config.GatewayModeLocal + svcPortName := "https-port" + svcPortValue := int32(8080) + svcProtocol := corev1.ProtocolTCP + svcTargetPortName := "https-target" + svcAllocateLoadBalancerNodePorts := false + svcStatusIP := "192.168.0.10" + svcStatusIPMode := corev1.LoadBalancerIPModeVIP + + epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP + + nodeName := "node" + + service := *newService("service1", "namespace1", "10.129.0.2", + []corev1.ServicePort{ + { + Name: svcPortName, + Port: svcPortValue, + Protocol: svcProtocol, + TargetPort: intstr.FromString(svcTargetPortName), + }, + }, + corev1.ServiceTypeLoadBalancer, + nil, + corev1.ServiceStatus{ + LoadBalancer: corev1.LoadBalancerStatus{ + Ingress: []corev1.LoadBalancerIngress{ + { + IP: svcStatusIP, + IPMode: &svcStatusIPMode, + }, + }, + }, + }, + true, false, + ) + service.Spec.AllocateLoadBalancerNodePorts = &svcAllocateLoadBalancerNodePorts + ep1 := discovery.Endpoint{ + Addresses: []string{"10.244.0.3"}, + NodeName: &nodeName, + } + ep2 := discovery.Endpoint{ + Addresses: []string{"10.244.0.4"}, + NodeName: &nodeName, + } + epPort1 := discovery.EndpointPort{ + Name: &svcPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, + } + // endpointSlice.Endpoints is ovn-networked so this will + // come under !hasLocalHostNetEp case + endpointSlice := *newEndpointSlice( + "service1", + "namespace1", + []discovery.Endpoint{ep1, ep2}, + []discovery.EndpointPort{epPort1}, + ) + + stopChan := make(chan struct{}) + fakeClient := util.GetOVNClientset(&service, &endpointSlice).GetNodeClientset() + wf, err := factory.NewNodeWatchFactory(fakeClient, nodeName) + Expect(err).ToNot(HaveOccurred()) + Expect(wf.Start()).To(Succeed()) + defer func() { + close(stopChan) + wf.Shutdown() + }() + + fNPW.watchFactory = wf + Expect(startNodePortWatcher(fNPW, fakeClient)).To(Succeed()) + + expectedTables := map[string]util.FakeTable{ + "nat": { + "PREROUTING": []string{ + "-j OVN-KUBE-ETP", + "-j OVN-KUBE-EXTERNALIP", + "-j OVN-KUBE-NODEPORT", + }, + "OUTPUT": []string{ + "-j OVN-KUBE-EXTERNALIP", + "-j OVN-KUBE-NODEPORT", + "-j OVN-KUBE-ITP", + }, + "OVN-KUBE-NODEPORT": []string{}, + "OVN-KUBE-EXTERNALIP": []string{ + fmt.Sprintf("-p %s -d %s --dport %d -j DNAT --to-destination %s:%v", + service.Spec.Ports[0].Protocol, + service.Status.LoadBalancer.Ingress[0].IP, + service.Spec.Ports[0].Port, + service.Spec.ClusterIP, + service.Spec.Ports[0].Port), + }, + "OVN-KUBE-ETP": []string{ + fmt.Sprintf("-p %s -d %s --dport %d -j DNAT --to-destination %s:%d -m statistic --mode random --probability 0.5000000000", + service.Spec.Ports[0].Protocol, + service.Status.LoadBalancer.Ingress[0].IP, + service.Spec.Ports[0].Port, + endpointSlice.Endpoints[0].Addresses[0], + *endpointSlice.Ports[0].Port), + fmt.Sprintf("-p %s -d %s --dport %d -j DNAT --to-destination %s:%d -m statistic --mode random --probability 1.0000000000", + service.Spec.Ports[0].Protocol, + service.Status.LoadBalancer.Ingress[0].IP, + service.Spec.Ports[0].Port, + endpointSlice.Endpoints[1].Addresses[0], + *endpointSlice.Ports[0].Port), + }, + "OVN-KUBE-ITP": []string{}, + }, + "filter": {}, + "mangle": { + "OUTPUT": []string{ + "-j OVN-KUBE-ITP", + }, + "OVN-KUBE-ITP": []string{}, + }, + } + + f4 := iptV4.(*util.FakeIPTables) + err = f4.MatchState(expectedTables, nil) + Expect(err).NotTo(HaveOccurred()) + + expectedNFT := getBaseNFTRules(types.K8sMgmtIntfName) + expectedNFT += fmt.Sprintf("add element inet ovn-kubernetes mgmtport-no-snat-services-v4 { %s . tcp . %v }\n"+ + "add element inet ovn-kubernetes mgmtport-no-snat-services-v4 { %s . tcp . %v }\n", + endpointSlice.Endpoints[1].Addresses[0], + *endpointSlice.Ports[0].Port, + endpointSlice.Endpoints[0].Addresses[0], + *endpointSlice.Ports[0].Port) + err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Expect(err).NotTo(HaveOccurred()) + + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") + Expect(flows).To(BeNil()) + + return nil + } + err := app.Run([]string{app.Name}) + Expect(err).NotTo(HaveOccurred()) + }) + It("inits iptables rules and openflows with LoadBalancer where ETP=cluster, LGW mode", func() { app.Action = func(*cli.Context) error { externalIP := "1.1.1.1" @@ -2092,6 +2246,7 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeLocal epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { @@ -2109,8 +2264,9 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.3"}, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will come // under !hasLocalHostNetEp case @@ -2231,6 +2387,7 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeShared epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { @@ -2249,8 +2406,9 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.3"}, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will come // under !hasLocalHostNetEp case @@ -2376,9 +2534,11 @@ var _ = Describe("Node Operations", func() { outport := int32(443) epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { + Name: epPortName, NodePort: int32(31111), Protocol: corev1.ProtocolTCP, Port: int32(8080), @@ -2396,8 +2556,9 @@ var _ = Describe("Node Operations", func() { NodeName: &fakeNodeName, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will // come under !hasLocalHostNetEp case @@ -2522,6 +2683,7 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeShared epPortName := "https" epPortValue := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { @@ -2539,8 +2701,9 @@ var _ = Describe("Node Operations", func() { Addresses: []string{"10.244.0.3"}, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &epPortValue, + Name: &epPortName, + Port: &epPortValue, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is ovn-networked so this will // come under !hasLocalHostNetEp case @@ -2665,9 +2828,11 @@ var _ = Describe("Node Operations", func() { config.Gateway.Mode = config.GatewayModeLocal epPortName := "https" outport := int32(443) + epPortProtocol := corev1.ProtocolTCP service := *newService("service1", "namespace1", "10.129.0.2", []corev1.ServicePort{ { + Name: epPortName, NodePort: int32(31111), Protocol: corev1.ProtocolTCP, Port: int32(8080), @@ -2684,8 +2849,9 @@ var _ = Describe("Node Operations", func() { NodeName: &fakeNodeName, } epPort1 := discovery.EndpointPort{ - Name: &epPortName, - Port: &outport, + Name: &epPortName, + Port: &outport, + Protocol: &epPortProtocol, } // endpointSlice.Endpoints is host-networked so this will // come under hasLocalHostNetEp case diff --git a/go-controller/pkg/node/gateway_nftables.go b/go-controller/pkg/node/gateway_nftables.go index b38f2baebb..eb776c1f5f 100644 --- a/go-controller/pkg/node/gateway_nftables.go +++ b/go-controller/pkg/node/gateway_nftables.go @@ -55,23 +55,34 @@ func getNoSNATNodePortRules(svcPort corev1.ServicePort) []*knftables.Element { // "mgmtport-no-snat-services-v4" and "mgmtport-no-snat-services-v6" sets to prevent SNAT // of sourceIP when passing through the management port, for an `externalTrafficPolicy: // Local` service *without* NodePorts. -func getNoSNATLoadBalancerIPRules(svcPort corev1.ServicePort, localEndpoints []string) []*knftables.Element { +func getNoSNATLoadBalancerIPRules(svcPort corev1.ServicePort, localEndpoints util.PortToLBEndpoints) []*knftables.Element { var nftRules []*knftables.Element protocol := strings.ToLower(string(svcPort.Protocol)) - port := fmt.Sprintf("%v", svcPort.TargetPort.IntValue()) - for _, ip := range localEndpoints { - setName := types.NFTMgmtPortNoSNATServicesV4 - if utilnet.IsIPv6String(ip) { - setName = types.NFTMgmtPortNoSNATServicesV6 - } + // Get the endpoints for the port key. + // svcPortKey is of format e.g. "TCP/my-port-name" or "TCP/" if name is empty + // (is the case when only a single ServicePort is defined on this service). + svcPortKey := util.GetServicePortKey(svcPort.Protocol, svcPort.Name) + lbEndpoints := localEndpoints[svcPortKey] + + for _, destination := range lbEndpoints.GetV4Destinations() { nftRules = append(nftRules, &knftables.Element{ - Set: setName, - Key: []string{ip, protocol, port}, + Set: types.NFTMgmtPortNoSNATServicesV4, + Key: []string{destination.IP, protocol, fmt.Sprintf("%d", destination.Port)}, }, ) } + + for _, destination := range lbEndpoints.GetV6Destinations() { + nftRules = append(nftRules, + &knftables.Element{ + Set: types.NFTMgmtPortNoSNATServicesV6, + Key: []string{destination.IP, protocol, fmt.Sprintf("%d", destination.Port)}, + }, + ) + } + return nftRules } @@ -164,7 +175,7 @@ func recreateNFTMap(mapName string, keepNFTElems []*knftables.Element) error { // getGatewayNFTRules returns nftables rules for service. This must be used in conjunction // with getGatewayIPTRules. -func getGatewayNFTRules(service *corev1.Service, localEndpoints []string, svcHasLocalHostNetEndPnt bool) []*knftables.Element { +func getGatewayNFTRules(service *corev1.Service, localEndpoints util.PortToLBEndpoints, svcHasLocalHostNetEndPnt bool) []*knftables.Element { rules := make([]*knftables.Element, 0) svcTypeIsETPLocal := util.ServiceExternalTrafficPolicyLocal(service) for _, svcPort := range service.Spec.Ports { diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index 773671b0ff..85141b1ff5 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -208,7 +208,7 @@ type serviceConfig struct { // hasLocalHostNetworkEp will be true for a service if it has at least one endpoint which is "hostnetworked&local-to-this-node". hasLocalHostNetworkEp bool // localEndpoints stores all the local non-host-networked endpoints for this service - localEndpoints sets.Set[string] + localEndpoints util.PortToLBEndpoints } type cidrAndFlags struct { @@ -616,7 +616,7 @@ func (npw *nodePortWatcher) getServiceInfo(index ktypes.NamespacedName) (out *se } // getAndSetServiceInfo creates and sets the serviceConfig, returns if it existed and whatever was there -func (npw *nodePortWatcher) getAndSetServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp bool, localEndpoints sets.Set[string]) (old *serviceConfig, exists bool) { +func (npw *nodePortWatcher) getAndSetServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp bool, localEndpoints util.PortToLBEndpoints) (old *serviceConfig, exists bool) { npw.serviceInfoLock.Lock() defer npw.serviceInfoLock.Unlock() @@ -630,7 +630,7 @@ func (npw *nodePortWatcher) getAndSetServiceInfo(index ktypes.NamespacedName, se } // addOrSetServiceInfo creates and sets the serviceConfig if it doesn't exist -func (npw *nodePortWatcher) addOrSetServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp bool, localEndpoints sets.Set[string]) (exists bool) { +func (npw *nodePortWatcher) addOrSetServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp bool, localEndpoints util.PortToLBEndpoints) (exists bool) { npw.serviceInfoLock.Lock() defer npw.serviceInfoLock.Unlock() @@ -645,7 +645,7 @@ func (npw *nodePortWatcher) addOrSetServiceInfo(index ktypes.NamespacedName, ser // updateServiceInfo sets the serviceConfig for a service and returns the existing serviceConfig, if inputs are nil // do not update those fields, if it does not exist return nil. -func (npw *nodePortWatcher) updateServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp *bool, localEndpoints sets.Set[string]) (old *serviceConfig, exists bool) { +func (npw *nodePortWatcher) updateServiceInfo(index ktypes.NamespacedName, service *corev1.Service, hasLocalHostNetworkEp *bool, localEndpoints util.PortToLBEndpoints) (old *serviceConfig, exists bool) { npw.serviceInfoLock.Lock() defer npw.serviceInfoLock.Unlock() @@ -672,7 +672,7 @@ func (npw *nodePortWatcher) updateServiceInfo(index ktypes.NamespacedName, servi // addServiceRules ensures the correct iptables rules and OpenFlow physical // flows are programmed for a given service and endpoint configuration -func addServiceRules(service *corev1.Service, netInfo util.NetInfo, localEndpoints []string, svcHasLocalHostNetEndPnt bool, npw *nodePortWatcher) error { +func addServiceRules(service *corev1.Service, netInfo util.NetInfo, localEndpoints util.PortToLBEndpoints, svcHasLocalHostNetEndPnt bool, npw *nodePortWatcher) error { // For dpu or Full mode var err error var errors []error @@ -716,7 +716,7 @@ func addServiceRules(service *corev1.Service, netInfo util.NetInfo, localEndpoin // delServiceRules deletes all possible iptables rules and OpenFlow physical // flows for a service -func delServiceRules(service *corev1.Service, localEndpoints []string, npw *nodePortWatcher) error { +func delServiceRules(service *corev1.Service, localEndpoints util.PortToLBEndpoints, npw *nodePortWatcher) error { var err error var errors []error // full mode || dpu mode @@ -820,7 +820,7 @@ func serviceUpdateNotNeeded(old, new *corev1.Service) bool { // AddService handles configuring shared gateway bridge flows to steer External IP, Node Port, Ingress LB traffic into OVN func (npw *nodePortWatcher) AddService(service *corev1.Service) error { - var localEndpoints sets.Set[string] + var localEndpoints util.PortToLBEndpoints var hasLocalHostNetworkEp bool if !util.ServiceTypeHasClusterIP(service) || !util.IsClusterIPSet(service) { return nil @@ -853,9 +853,9 @@ func (npw *nodePortWatcher) AddService(service *corev1.Service) error { if exists := npw.addOrSetServiceInfo(name, service, hasLocalHostNetworkEp, localEndpoints); !exists { klog.V(5).Infof("Service Add %s event in namespace %s came before endpoint event setting svcConfig", service.Name, service.Namespace) - if err := addServiceRules(service, netInfo, sets.List(localEndpoints), hasLocalHostNetworkEp, npw); err != nil { + if err := addServiceRules(service, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { npw.getAndDeleteServiceInfo(name) - return fmt.Errorf("AddService failed for nodePortWatcher: %w, trying delete: %v", err, delServiceRules(service, sets.List(localEndpoints), npw)) + return fmt.Errorf("AddService failed for nodePortWatcher: %w, trying delete: %v", err, delServiceRules(service, localEndpoints, npw)) } } else { // Need to update flows here in case an attribute of the gateway has changed, such as MAC address @@ -892,7 +892,7 @@ func (npw *nodePortWatcher) UpdateService(old, new *corev1.Service) error { // so that we don't miss any endpoint update events here klog.V(5).Infof("Deleting old service rules for: %v", old) - if err = delServiceRules(old, sets.List(svcConfig.localEndpoints), npw); err != nil { + if err = delServiceRules(old, svcConfig.localEndpoints, npw); err != nil { errors = append(errors, err) } } @@ -905,7 +905,7 @@ func (npw *nodePortWatcher) UpdateService(old, new *corev1.Service) error { return fmt.Errorf("error getting active network for service %s in namespace %s: %w", new.Name, new.Namespace, err) } - if err = addServiceRules(new, netInfo, sets.List(svcConfig.localEndpoints), svcConfig.hasLocalHostNetworkEp, npw); err != nil { + if err = addServiceRules(new, netInfo, svcConfig.localEndpoints, svcConfig.hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) } } @@ -968,7 +968,7 @@ func (npw *nodePortWatcher) DeleteService(service *corev1.Service) error { klog.V(5).Infof("Deleting service %s in namespace %s", service.Name, service.Namespace) name := ktypes.NamespacedName{Namespace: service.Namespace, Name: service.Name} if svcConfig, exists := npw.getAndDeleteServiceInfo(name); exists { - if err = delServiceRules(svcConfig.service, sets.List(svcConfig.localEndpoints), npw); err != nil { + if err = delServiceRules(svcConfig.service, svcConfig.localEndpoints, npw); err != nil { errors = append(errors, err) } } else { @@ -1040,9 +1040,8 @@ func (npw *nodePortWatcher) SyncServices(services []interface{}) error { } // Add correct netfilter rules only for Full mode if !npw.dpuMode { - localEndpointsArray := sets.List(localEndpoints) - keepIPTRules = append(keepIPTRules, getGatewayIPTRules(service, localEndpointsArray, hasLocalHostNetworkEp)...) - keepNFTSetElems = append(keepNFTSetElems, getGatewayNFTRules(service, localEndpointsArray, hasLocalHostNetworkEp)...) + keepIPTRules = append(keepIPTRules, getGatewayIPTRules(service, localEndpoints, hasLocalHostNetworkEp)...) + keepNFTSetElems = append(keepNFTSetElems, getGatewayNFTRules(service, localEndpoints, hasLocalHostNetworkEp)...) if util.IsNetworkSegmentationSupportEnabled() && netInfo.IsPrimaryNetwork() { netConfig := npw.ofm.getActiveNetwork(netInfo) if netConfig == nil { @@ -1140,7 +1139,7 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e out, exists := npw.getServiceInfo(*svcNamespacedName) if !exists { klog.V(5).Infof("Endpointslice %s ADD event in namespace %s is creating rules", epSlice.Name, epSlice.Namespace) - if err = addServiceRules(svc, netInfo, sets.List(localEndpoints), hasLocalHostNetworkEp, npw); err != nil { + if err = addServiceRules(svc, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { return err } npw.addOrSetServiceInfo(*svcNamespacedName, svc, hasLocalHostNetworkEp, localEndpoints) @@ -1150,10 +1149,10 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e if out.hasLocalHostNetworkEp != hasLocalHostNetworkEp || (!util.LoadBalancerServiceHasNodePortAllocation(svc) && !reflect.DeepEqual(out.localEndpoints, localEndpoints)) { klog.V(5).Infof("Endpointslice %s ADD event in namespace %s is updating rules", epSlice.Name, epSlice.Namespace) - if err = delServiceRules(svc, sets.List(out.localEndpoints), npw); err != nil { + if err = delServiceRules(svc, out.localEndpoints, npw); err != nil { errors = append(errors, err) } - if err = addServiceRules(svc, netInfo, sets.List(localEndpoints), hasLocalHostNetworkEp, npw); err != nil { + if err = addServiceRules(svc, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) } else { npw.updateServiceInfo(*svcNamespacedName, svc, &hasLocalHostNetworkEp, localEndpoints) @@ -1211,10 +1210,10 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice npw.serviceInfoLock.Lock() defer npw.serviceInfoLock.Unlock() - if err = delServiceRules(svcConfig.service, sets.List(svcConfig.localEndpoints), npw); err != nil { + if err = delServiceRules(svcConfig.service, svcConfig.localEndpoints, npw); err != nil { errors = append(errors, err) } - if err = addServiceRules(svcConfig.service, netInfo, sets.List(localEndpoints), hasLocalHostNetworkEp, npw); err != nil { + if err = addServiceRules(svcConfig.service, netInfo, localEndpoints, hasLocalHostNetworkEp, npw); err != nil { errors = append(errors, err) } return utilerrors.Join(errors...) @@ -1222,9 +1221,24 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice return nil } -// GetLocalEndpointAddresses returns a list of eligible endpoints that are local to the node -func (npw *nodePortWatcher) GetLocalEligibleEndpointAddresses(endpointSlices []*discovery.EndpointSlice, service *corev1.Service) sets.Set[string] { - return util.GetLocalEligibleEndpointAddressesFromSlices(endpointSlices, service, npw.nodeIPManager.nodeName) +// GetLocalEligibleEndpointAddresses returns eligible endpoints that are local to the node. +// This method uses util.GetEndpointsForService, the same as the services Controller via buildServiceLBConfigs, +// meaning that the nodePortWatcher and the services Controller now use common logic to build their service endpoints. +func (npw *nodePortWatcher) GetLocalEligibleEndpointAddresses(endpointSlices []*discovery.EndpointSlice, + service *corev1.Service) util.PortToLBEndpoints { + s := sets.Set[string]{} + s.Insert(npw.nodeIPManager.nodeName) + _, portToNodeToLBEndpoints, err := util.GetEndpointsForService(endpointSlices, service, s, false, true) + if err != nil { + if service != nil { + klog.Warningf("Failed to get local endpoints for service %s/%s on node %s: %v", + service.Namespace, service.Name, npw.nodeIPManager.nodeName, err) + } else { + klog.Warningf("Failed to get local endpoints on node %s: %v", npw.nodeIPManager.nodeName, err) + } + } + + return portToNodeToLBEndpoints.GetNode(npw.nodeIPManager.nodeName) } func (npw *nodePortWatcher) UpdateEndpointSlice(oldEpSlice, newEpSlice *discovery.EndpointSlice) error { diff --git a/go-controller/pkg/node/gateway_udn_test.go b/go-controller/pkg/node/gateway_udn_test.go index 984a666195..cf6ee7e1dc 100644 --- a/go-controller/pkg/node/gateway_udn_test.go +++ b/go-controller/pkg/node/gateway_udn_test.go @@ -14,6 +14,7 @@ import ( "github.com/containernetworking/plugins/pkg/testutils" nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" nadfake "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/fake" + "github.com/spf13/afero" "github.com/stretchr/testify/mock" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" @@ -170,7 +171,7 @@ func setUpGatewayFakeOVSCommands(fexec *ovntest.FakeExec) { Output: "cb9ec8fa-b409-4ef3-9f42-d9283c47aac6", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-appctl --timeout=15 dpif/show-dp-features breth0", + Cmd: "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl dpif/show-dp-features breth0", Output: "Check pkt length action: Yes", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ @@ -178,7 +179,7 @@ func setUpGatewayFakeOVSCommands(fexec *ovntest.FakeExec) { Output: "false", }) fexec.AddFakeCmdsNoOutputNoError([]string{ - "ovs-appctl --timeout=15 fdb/add breth0 breth0 0 00:00:00:55:66:99", + "ovs-appctl -t /var/run/openvswitch/ovs-vswitchd.1234.ctl fdb/add breth0 breth0 0 00:00:00:55:66:99", }) fexec.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: "ovs-vsctl --timeout=15 get Interface patch-breth0_worker1-to-br-int ofport", @@ -293,6 +294,9 @@ var _ = Describe("UserDefinedNetworkGateway", func() { config.Gateway.V4MasqueradeSubnet = "169.254.0.0/17" // Set up a fake vsctl command mock interface fexec = ovntest.NewFakeExec() + // Setup mock filesystem for ovs-vswitchd.pid file needed by ovs-appctl commands + Expect(util.AppFs.MkdirAll("/var/run/openvswitch/", 0o755)).To(Succeed()) + Expect(afero.WriteFile(util.AppFs, "/var/run/openvswitch/ovs-vswitchd.pid", []byte("1234"), 0o644)).To(Succeed()) Expect(util.SetExec(fexec)).To(Succeed()) // Set up a fake k8sMgmt interface testNS, err = testutils.NewNS() diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 8ac4974c00..5a318bd859 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -1045,6 +1045,12 @@ func (bnc *BaseNetworkController) shouldReleaseDeletedPod(pod *corev1.Pod, switc } } + // this pod is being deleted before completion so there is no risk (and no + // need to check) that its IPs are being used by other pod + if !util.PodCompleted(pod) { + return true, nil + } + if bnc.wasPodReleasedBeforeStartup(string(pod.UID), nad) { klog.Infof("Completed pod %s/%s was already released for nad %s before startup", pod.Namespace, diff --git a/go-controller/pkg/ovn/base_network_controller_pods_test.go b/go-controller/pkg/ovn/base_network_controller_pods_test.go index 4d1a6393c4..2987a1ebe2 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods_test.go +++ b/go-controller/pkg/ovn/base_network_controller_pods_test.go @@ -234,3 +234,41 @@ func TestBaseNetworkController_trackPodsReleasedBeforeStartup(t *testing.T) { }) } } + +func TestBaseNetworkController_shouldReleaseDeletedPod(t *testing.T) { + tests := []struct { + name string // description of this test case + // Named input parameters for target function. + pod *corev1.Pod + switchName string + nad string + podIfAddrs []*net.IPNet + want bool + wantErr bool + }{ + { + name: "should release a running pod", + pod: &corev1.Pod{Status: corev1.PodStatus{Phase: corev1.PodRunning}}, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var bnc BaseNetworkController + bnc.ReconcilableNetInfo = &util.DefaultNetInfo{} + got, gotErr := bnc.shouldReleaseDeletedPod(tt.pod, tt.switchName, tt.nad, tt.podIfAddrs) + if gotErr != nil { + if !tt.wantErr { + t.Errorf("shouldReleaseDeletedPod() failed: %v", gotErr) + } + return + } + if tt.wantErr { + t.Fatal("shouldReleaseDeletedPod() succeeded unexpectedly") + } + if got != tt.want { + t.Errorf("shouldReleaseDeletedPod() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/go-controller/pkg/ovn/controller/services/lb_config.go b/go-controller/pkg/ovn/controller/services/lb_config.go index b6bbd833ba..a06059e31a 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config.go +++ b/go-controller/pkg/ovn/controller/services/lb_config.go @@ -30,8 +30,8 @@ type lbConfig struct { protocol corev1.Protocol // TCP, UDP, or SCTP inport int32 // the incoming (virtual) port number - clusterEndpoints lbEndpoints // addresses of cluster-wide endpoints - nodeEndpoints map[string]lbEndpoints // node -> addresses of local endpoints + clusterEndpoints util.LBEndpoints // addresses of cluster-wide endpoints + nodeEndpoints map[string]util.LBEndpoints // node -> addresses of local endpoints // if true, then vips added on the router are in "local" mode // that means, skipSNAT, and remove any non-local endpoints. @@ -44,12 +44,6 @@ type lbConfig struct { hasNodePort bool } -type lbEndpoints struct { - Port int32 - V4IPs []string - V6IPs []string -} - func makeNodeSwitchTargetIPs(service *corev1.Service, node string, c *lbConfig) (targetIPsV4, targetIPsV6 []string, v4Changed, v6Changed bool) { targetIPsV4 = c.clusterEndpoints.V4IPs targetIPsV6 = c.clusterEndpoints.V6IPs @@ -166,7 +160,7 @@ var protos = []corev1.Protocol{ // - services with NodePort set but *without* ExternalTrafficPolicy=Local or // affinity timeout set. func buildServiceLBConfigs(service *corev1.Service, endpointSlices []*discovery.EndpointSlice, nodeInfos []nodeInfo, - useLBGroup, useTemplates bool, networkName string) (perNodeConfigs, templateConfigs, clusterConfigs []lbConfig) { + useLBGroup, useTemplates bool) (perNodeConfigs, templateConfigs, clusterConfigs []lbConfig) { needsAffinityTimeout := hasSessionAffinityTimeOut(service) @@ -175,13 +169,21 @@ func buildServiceLBConfigs(service *corev1.Service, endpointSlices []*discovery. nodes.Insert(n.name) } // get all the endpoints classified by port and by port,node - portToClusterEndpoints, portToNodeToEndpoints := getEndpointsForService(endpointSlices, service, nodes, networkName) + needsLocalEndpoints := util.ServiceExternalTrafficPolicyLocal(service) || util.ServiceInternalTrafficPolicyLocal(service) + portToClusterEndpoints, portToNodeToEndpoints, err := util.GetEndpointsForService(endpointSlices, service, nodes, true, needsLocalEndpoints) + if err != nil { + if service != nil { + klog.Warningf("Failed to get endpoints for service %s/%s during LB config build: %v", service.Namespace, service.Name, err) + } else { + klog.Warningf("Failed to get endpoints for service during LB config build: %v", err) + } + } for _, svcPort := range service.Spec.Ports { - svcPortKey := getServicePortKey(svcPort.Protocol, svcPort.Name) + svcPortKey := util.GetServicePortKey(svcPort.Protocol, svcPort.Name) clusterEndpoints := portToClusterEndpoints[svcPortKey] nodeEndpoints := portToNodeToEndpoints[svcPortKey] if nodeEndpoints == nil { - nodeEndpoints = make(map[string]lbEndpoints) + nodeEndpoints = make(map[string]util.LBEndpoints) } // if ExternalTrafficPolicy or InternalTrafficPolicy is local, then we need to do things a bit differently externalTrafficLocal := util.ServiceExternalTrafficPolicyLocal(service) @@ -944,121 +946,3 @@ func joinHostsPort(ips []string, port int32) []Addr { } return out } - -func getServicePortKey(protocol corev1.Protocol, name string) string { - return fmt.Sprintf("%s/%s", protocol, name) -} - -// GetEndpointsForService takes a service, all its slices and the list of nodes in the OVN zone -// and returns two maps that hold all the endpoint addresses for the service: -// one classified by port, one classified by port,node. This second map is only filled in -// when the service needs local (per-node) endpoints, that is when ETP=local or ITP=local. -// The node list helps to keep the resulting map small, since we're only interested in local endpoints. -func getEndpointsForService(slices []*discovery.EndpointSlice, service *corev1.Service, nodes sets.Set[string], - networkName string) (map[string]lbEndpoints, map[string]map[string]lbEndpoints) { - - // classify endpoints - ports := map[string]int32{} - portToEndpoints := map[string][]discovery.Endpoint{} - portToNodeToEndpoints := map[string]map[string][]discovery.Endpoint{} - requiresLocalEndpoints := util.ServiceExternalTrafficPolicyLocal(service) || util.ServiceInternalTrafficPolicyLocal(service) - - for _, port := range service.Spec.Ports { - name := getServicePortKey(port.Protocol, port.Name) - ports[name] = 0 - } - - for _, slice := range slices { - - if slice.AddressType == discovery.AddressTypeFQDN { - continue // consider only v4 and v6, discard FQDN - } - - slicePorts := make([]string, 0, len(slice.Ports)) - - for _, port := range slice.Ports { - // check if there's a service port matching the slice protocol/name - slicePortName := "" - if port.Name != nil { - slicePortName = *port.Name - } - name := getServicePortKey(*port.Protocol, slicePortName) - if _, hasPort := ports[name]; hasPort { - slicePorts = append(slicePorts, name) - ports[name] = *port.Port - continue - } - // service port name might be empty: check against slice protocol/"" - noName := getServicePortKey(*port.Protocol, "") - if _, hasPort := ports[noName]; hasPort { - slicePorts = append(slicePorts, name) - ports[noName] = *port.Port - } - } - for _, endpoint := range slice.Endpoints { - for _, port := range slicePorts { - - portToEndpoints[port] = append(portToEndpoints[port], endpoint) - - // won't add items to portToNodeToEndpoints if the service doesn't need it, - // the endpoint is not assigned to a node yet or the endpoint is not local to the OVN zone - if !requiresLocalEndpoints || endpoint.NodeName == nil || !nodes.Has(*endpoint.NodeName) { - continue - } - if portToNodeToEndpoints[port] == nil { - portToNodeToEndpoints[port] = make(map[string][]discovery.Endpoint, len(nodes)) - } - - if portToNodeToEndpoints[port][*endpoint.NodeName] == nil { - portToNodeToEndpoints[port][*endpoint.NodeName] = []discovery.Endpoint{} - } - portToNodeToEndpoints[port][*endpoint.NodeName] = append(portToNodeToEndpoints[port][*endpoint.NodeName], endpoint) - } - } - } - - // get eligible endpoint addresses - portToLBEndpoints := make(map[string]lbEndpoints, len(portToEndpoints)) - portToNodeToLBEndpoints := make(map[string]map[string]lbEndpoints, len(portToEndpoints)) - - for port, endpoints := range portToEndpoints { - addresses := util.GetEligibleEndpointAddresses(endpoints, service) - v4IPs, _ := util.MatchAllIPStringFamily(false, addresses) - v6IPs, _ := util.MatchAllIPStringFamily(true, addresses) - if len(v4IPs) > 0 || len(v6IPs) > 0 { - portToLBEndpoints[port] = lbEndpoints{ - V4IPs: v4IPs, - V6IPs: v6IPs, - Port: ports[port], - } - } - } - klog.V(5).Infof("Cluster endpoints for %s/%s for network=%s are: %v", - service.Namespace, service.Name, networkName, portToLBEndpoints) - - for port, nodeToEndpoints := range portToNodeToEndpoints { - for node, endpoints := range nodeToEndpoints { - addresses := util.GetEligibleEndpointAddresses(endpoints, service) - v4IPs, _ := util.MatchAllIPStringFamily(false, addresses) - v6IPs, _ := util.MatchAllIPStringFamily(true, addresses) - if len(v4IPs) > 0 || len(v6IPs) > 0 { - if portToNodeToLBEndpoints[port] == nil { - portToNodeToLBEndpoints[port] = make(map[string]lbEndpoints, len(nodes)) - } - - portToNodeToLBEndpoints[port][node] = lbEndpoints{ - V4IPs: v4IPs, - V6IPs: v6IPs, - Port: ports[port], - } - } - } - } - - if requiresLocalEndpoints { - klog.V(5).Infof("Local endpoints for %s/%s for network=%s are: %v", - service.Namespace, service.Name, networkName, portToNodeToLBEndpoints) - } - - return portToLBEndpoints, portToNodeToLBEndpoints -} diff --git a/go-controller/pkg/ovn/controller/services/lb_config_ocphack_test.go b/go-controller/pkg/ovn/controller/services/lb_config_ocphack_test.go index 48975cd65a..bc51c4f218 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config_ocphack_test.go +++ b/go-controller/pkg/ovn/controller/services/lb_config_ocphack_test.go @@ -90,7 +90,7 @@ func Test_buildPerNodeLBs_OCPHackForDNS(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: 8080, }, @@ -263,11 +263,11 @@ func Test_buildPerNodeLBs_OCPHackForLocalWithFallback(t *testing.T) { inport: 5, // node port externalTrafficLocal: true, hasNodePort: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: map[string]util.LBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: outport}, nodeB: {V4IPs: []string{"10.128.1.2"}, Port: outport}, }, @@ -277,11 +277,11 @@ func Test_buildPerNodeLBs_OCPHackForLocalWithFallback(t *testing.T) { protocol: corev1.ProtocolTCP, inport: inport, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: map[string]util.LBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: outport}, nodeB: {V4IPs: []string{"10.128.1.2"}, Port: outport}, }, @@ -362,11 +362,11 @@ func Test_buildPerNodeLBs_OCPHackForLocalWithFallback(t *testing.T) { inport: 5, // node port externalTrafficLocal: true, hasNodePort: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.1.2"}, // only endpoint on node-b is running Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: map[string]util.LBEndpoints{ nodeB: {V4IPs: []string{"10.128.1.2"}, Port: outport}, }, }, @@ -375,11 +375,11 @@ func Test_buildPerNodeLBs_OCPHackForLocalWithFallback(t *testing.T) { protocol: corev1.ProtocolTCP, inport: inport, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: map[string]util.LBEndpoints{ nodeB: {V4IPs: []string{"10.128.1.2"}, Port: outport}, }, }, diff --git a/go-controller/pkg/ovn/controller/services/lb_config_test.go b/go-controller/pkg/ovn/controller/services/lb_config_test.go index 15e653fec1..3b22e1e2bf 100644 --- a/go-controller/pkg/ovn/controller/services/lb_config_test.go +++ b/go-controller/pkg/ovn/controller/services/lb_config_test.go @@ -60,10 +60,10 @@ func getSampleService(publishNotReadyAddresses bool) *corev1.Service { } } -func getServicePort(name string, _ int32, protocol corev1.Protocol) corev1.ServicePort { +func getServicePort(name string, port int32, protocol corev1.Protocol) corev1.ServicePort { return corev1.ServicePort{ Name: name, - TargetPort: intstr.FromInt(int(httpPortValue)), + TargetPort: intstr.FromInt(int(port)), Protocol: protocol, } } @@ -247,8 +247,8 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{}, - nodeEndpoints: map[string]lbEndpoints{}, + clusterEndpoints: util.LBEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }}, resultsSame: true, }, @@ -275,11 +275,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, // service is not ETP=local or ITP=local, so nodeEndpoints is not filled out + nodeEndpoints: util.PortToLBEndpoints{}, // service is not ETP=local or ITP=local, so nodeEndpoints is not filled out }}, resultsSame: true, }, @@ -307,11 +307,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ // service is ETP=local, so nodeEndpoints is filled out + nodeEndpoints: util.PortToLBEndpoints{ // service is ETP=local, so nodeEndpoints is filled out nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -374,21 +374,21 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport1, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport1, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, }, @@ -446,21 +446,21 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolUDP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, }, @@ -488,12 +488,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }}, }, { @@ -528,12 +528,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1", "4.2.2.2", "42::42", "5.5.5.5"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, // ETP=cluster (default), so nodeEndpoints is not filled out + nodeEndpoints: util.PortToLBEndpoints{}, // ETP=cluster (default), so nodeEndpoints is not filled out }}, }, { @@ -570,12 +570,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, @@ -590,12 +590,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { protocol: corev1.ProtocolTCP, inport: inport, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, @@ -630,23 +630,23 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }}, resultSharedGatewayTemplate: []lbConfig{{ vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, V6IPs: []string{"fe00::1:1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, hasNodePort: true, }}, }, @@ -677,12 +677,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, resultSharedGatewayTemplate: []lbConfig{ @@ -690,12 +690,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, hasNodePort: true, }, }, @@ -705,12 +705,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, resultLocalGatewayTemplate: []lbConfig{ @@ -718,12 +718,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, hasNodePort: true, }, }, @@ -756,12 +756,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, @@ -775,12 +775,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, @@ -794,12 +794,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 5, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, @@ -813,12 +813,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, @@ -854,12 +854,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, resultLocalGatewayNode: []lbConfig{ @@ -867,12 +867,12 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1", "2002::1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"2001::1"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{}, + nodeEndpoints: util.PortToLBEndpoints{}, }, }, }, @@ -915,11 +915,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -938,11 +938,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: 5, hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -959,11 +959,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: inport, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1016,11 +1016,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1039,11 +1039,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: 5, hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1060,11 +1060,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: inport, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1115,11 +1115,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { vips: []string{"192.168.1.1"}, protocol: corev1.ProtocolTCP, inport: inport, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1134,11 +1134,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: 5, hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1151,11 +1151,11 @@ func Test_buildServiceLBConfigs(t *testing.T) { inport: inport, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: outport, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: outport, @@ -1170,7 +1170,7 @@ func Test_buildServiceLBConfigs(t *testing.T) { t.Run(fmt.Sprintf("%d_%s", i, tt.name), func(t *testing.T) { // shared gateway mode globalconfig.Gateway.Mode = globalconfig.GatewayModeShared - perNode, template, clusterWide := buildServiceLBConfigs(tt.args.service, tt.args.slices, defaultNodes, true, true, types.DefaultNetworkName) + perNode, template, clusterWide := buildServiceLBConfigs(tt.args.service, tt.args.slices, defaultNodes, true, true) assert.EqualValues(t, tt.resultSharedGatewayNode, perNode, "SGW per-node configs should be equal") assert.EqualValues(t, tt.resultSharedGatewayTemplate, template, "SGW template configs should be equal") @@ -1179,7 +1179,7 @@ func Test_buildServiceLBConfigs(t *testing.T) { // local gateway mode globalconfig.Gateway.Mode = globalconfig.GatewayModeLocal - perNode, template, clusterWide = buildServiceLBConfigs(tt.args.service, tt.args.slices, defaultNodes, true, true, types.DefaultNetworkName) + perNode, template, clusterWide = buildServiceLBConfigs(tt.args.service, tt.args.slices, defaultNodes, true, true) if tt.resultsSame { assert.EqualValues(t, tt.resultSharedGatewayNode, perNode, "LGW per-node configs should be equal") assert.EqualValues(t, tt.resultSharedGatewayTemplate, template, "LGW template configs should be equal") @@ -1239,11 +1239,11 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1", "192.168.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1", "192.168.0.2"}, Port: 8080, @@ -1254,11 +1254,11 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolTCP, inport: 443, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, Port: 8043, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, Port: 8043, @@ -1298,11 +1298,11 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1", "192.168.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1", "192.168.0.2"}, Port: 8080, @@ -1313,11 +1313,11 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolUDP, inport: 443, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, Port: 8043, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, Port: 8043, @@ -1369,13 +1369,13 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4", "fe80::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1", "192.168.0.2"}, V6IPs: []string{"fe90::1", "fe91::1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1", "192.168.0.2"}, V6IPs: []string{"fe90::1", "fe91::1"}, @@ -1387,13 +1387,13 @@ func Test_buildClusterLBs(t *testing.T) { vips: []string{"1.2.3.4", "fe80::1"}, protocol: corev1.ProtocolTCP, inport: 443, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe90::1"}, Port: 8043, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe90::1"}, @@ -1554,11 +1554,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"1.2.3.4"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1604,11 +1604,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: 8080}, }, }, @@ -1690,11 +1690,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"192.168.0.1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1705,11 +1705,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1851,11 +1851,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"192.168.0.1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1868,11 +1868,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, externalTrafficLocal: true, hasNodePort: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -1998,11 +1998,11 @@ func Test_buildPerNodeLBs(t *testing.T) { protocol: corev1.ProtocolTCP, inport: 80, internalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.1", "10.128.1.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.1"}, Port: 8080, @@ -2017,11 +2017,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"1.2.3.4"}, // externalIP config protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.1", "10.128.1.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.1"}, Port: 8080, @@ -2149,11 +2149,11 @@ func Test_buildPerNodeLBs(t *testing.T) { protocol: corev1.ProtocolTCP, inport: 80, internalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1", "10.0.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -2168,11 +2168,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"1.2.3.4"}, // externalIP config protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1", "10.0.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, Port: 8080, @@ -2336,11 +2336,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, internalTrafficLocal: true, externalTrafficLocal: false, // ETP is applicable only to nodePorts and LBs - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, // only one ep on node-a Port: 8080, @@ -2354,11 +2354,11 @@ func Test_buildPerNodeLBs(t *testing.T) { externalTrafficLocal: true, internalTrafficLocal: false, // ITP is applicable only to clusterIPs hasNodePort: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.0.0.1"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.0.0.1"}, // only one ep on node-a Port: 8080, @@ -2576,11 +2576,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 5, // nodePort hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: 8080, @@ -2597,11 +2597,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2", "10.128.1.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"10.128.0.2"}, Port: 8080, @@ -2731,11 +2731,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 5, // nodePort hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: 8080}, nodeB: {V4IPs: []string{"10.128.1.2"}, Port: 8080}, }, @@ -2746,11 +2746,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"10.128.0.2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V4IPs: []string{"10.128.0.2"}, Port: 8080}, nodeB: {V4IPs: []string{"10.128.1.2"}, Port: 8080}, }, @@ -2883,11 +2883,11 @@ func Test_buildPerNodeLBs(t *testing.T) { vips: []string{"node"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080}, }, }, @@ -2972,11 +2972,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 5, // nodePort hasNodePort: true, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080}, nodeB: {V6IPs: []string{"fe00:0:0:0:2::2"}, Port: 8080}, }, @@ -2987,11 +2987,11 @@ func Test_buildPerNodeLBs(t *testing.T) { inport: 80, hasNodePort: false, externalTrafficLocal: true, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: {V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080}, nodeB: {V6IPs: []string{"fe00:0:0:0:2::2"}, Port: 8080}, }, @@ -3274,7 +3274,7 @@ func Test_idledServices(t *testing.T) { } } -func Test_getEndpointsForService(t *testing.T) { +func Test_GetEndpointsForService(t *testing.T) { type args struct { slices []*discovery.EndpointSlice svc *corev1.Service @@ -3284,8 +3284,9 @@ func Test_getEndpointsForService(t *testing.T) { tests := []struct { name string args args - wantClusterEndpoints map[string]lbEndpoints - wantNodeEndpoints map[string]map[string]lbEndpoints + wantClusterEndpoints util.PortToLBEndpoints + wantNodeEndpoints util.PortToNodeToLBEndpoints + wantError error }{ { name: "empty slices", @@ -3293,8 +3294,8 @@ func Test_getEndpointsForService(t *testing.T) { slices: []*discovery.EndpointSlice{}, svc: getSampleServiceWithOnePort(httpPortName, httpPortValue, tcp), }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no cluster-wide endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no local endpoints + wantClusterEndpoints: util.PortToLBEndpoints{}, // no cluster-wide endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no local endpoints }, { name: "slice with one local endpoint", @@ -3320,9 +3321,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no need for local endpoints, service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no need for local endpoints, service is not ETP or ITP local }, { name: "slice with one local endpoint, ETP=local", @@ -3348,10 +3349,10 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 80}}}, // ETP=local, one local endpoint + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 80}}}, // ETP=local, one local endpoint }, { name: "slice with one non-local endpoint, ETP=local", @@ -3377,9 +3378,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // ETP=local but no local endpoint + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // ETP=local but no local endpoint }, { name: "slice of address type FQDN", @@ -3405,8 +3406,8 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no local endpoint + wantClusterEndpoints: util.PortToLBEndpoints{}, // no endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no local endpoint }, { name: "slice with one endpoint, OVN zone with two nodes, ETP=local", @@ -3432,10 +3433,10 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), nodes: sets.New(nodeA, nodeB), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeB: lbEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 80}}}, // endpoint on nodeB + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeB: util.LBEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 80}}}, // endpoint on nodeB }, { name: "slice with different port name than the service", @@ -3461,8 +3462,8 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no cluster-wide endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no local endpoints + wantClusterEndpoints: util.PortToLBEndpoints{}, // no cluster-wide endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no local endpoints }, { name: "slice and service without a port name, ETP=local", @@ -3488,10 +3489,10 @@ func Test_getEndpointsForService(t *testing.T) { nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, ""): {V4IPs: []string{"10.0.0.2"}, Port: 8080}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, ""): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 8080}}}, // one local endpoint + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, ""): {V4IPs: []string{"10.0.0.2"}, Port: 8080}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, ""): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2"}, Port: 8080}}}, // one local endpoint }, { name: "slice with an IPv6 endpoint", @@ -3517,9 +3518,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::2"}, Port: 80}}, // one cluster-wide endpoint - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::2"}, Port: 80}}, // one cluster-wide endpoint + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "a slice with an IPv4 endpoint and a slice with an IPv6 endpoint (dualstack cluster), ETP=local", @@ -3561,10 +3562,10 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}}, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}}, }, { name: "one slice with a duplicate address in the same endpoint", @@ -3590,9 +3591,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "one slice with a duplicate address across two endpoints", @@ -3618,9 +3619,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiples slices with a duplicate address, with both address being ready", @@ -3662,9 +3663,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2", "10.2.2.2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2", "10.2.2.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiples slices with different ports, ETP=local", @@ -3706,12 +3707,12 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithTwoPortsAndETPLocal("tcp-example", "other-port", 80, 8080, tcp, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}, - getServicePortKey(tcp, "other-port"): {V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, - getServicePortKey(tcp, "other-port"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}}, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}, + util.GetServicePortKey(tcp, "other-port"): {V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, + util.GetServicePortKey(tcp, "other-port"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}}, }, { name: "multiples slices with different ports, OVN zone with two nodes, ETP=local", @@ -3753,12 +3754,12 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithTwoPortsAndETPLocal("tcp-example", "other-port", 80, 8080, tcp, tcp), nodes: sets.New(nodeA, nodeB), // zone with two nodes }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}, - getServicePortKey(tcp, "other-port"): {V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {nodeA: lbEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, - getServicePortKey(tcp, "other-port"): {nodeB: lbEndpoints{V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}}, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}, + util.GetServicePortKey(tcp, "other-port"): {V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, + util.GetServicePortKey(tcp, "other-port"): {nodeB: util.LBEndpoints{V4IPs: []string{"10.0.0.3", "10.2.2.3"}, Port: 8080}}}, }, { name: "slice with a mix of ready and terminating (serving and non-serving) endpoints", @@ -3791,9 +3792,9 @@ func Test_getEndpointsForService(t *testing.T) { nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::2", "2001:db2::3"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::2", "2001:db2::3"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "slice with a mix of terminating (serving and non-serving) endpoints and no ready endpoints", @@ -3824,9 +3825,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::4", "2001:db2::5"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::4", "2001:db2::5"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "slice with only terminating non-serving endpoints", @@ -3855,8 +3856,8 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no cluster-wide endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{}, // no cluster-wide endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { @@ -3905,9 +3906,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::3", "2001:db2::4"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V6IPs: []string{"2001:db2::3", "2001:db2::4"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiple slices with only terminating non-serving endpoints", @@ -3953,8 +3954,8 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no cluster-wide endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no local endpoints + wantClusterEndpoints: util.PortToLBEndpoints{}, // no cluster-wide endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no local endpoints }, { @@ -4005,9 +4006,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2"}, V6IPs: []string{"2001:db2::2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiple slices with a mix of IPv4 and IPv6 terminating (serving and non-serving) endpoints and no ready endpoints (dualstack cluster)", @@ -4055,9 +4056,9 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePort("tcp-example", 80, tcp), nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.3"}, V6IPs: []string{"2001:db2::3"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.3"}, V6IPs: []string{"2001:db2::3"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local }, { name: "multiple slices with a mix of IPv4 and IPv6 terminating non-serving endpoints (dualstack cluster)", @@ -4104,8 +4105,8 @@ func Test_getEndpointsForService(t *testing.T) { nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{}, // no endpoints - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // no endpoints + wantClusterEndpoints: util.PortToLBEndpoints{}, // no endpoints + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // no endpoints }, { name: "multiple slices with a mix of IPv4 and IPv6 ready and terminating (serving and non-serving) endpoints (dualstack cluster) and service.PublishNotReadyAddresses=true", @@ -4155,21 +4156,300 @@ func Test_getEndpointsForService(t *testing.T) { svc: getSampleServiceWithOnePortAndPublishNotReadyAddresses("tcp-example", 80, tcp), // <-- publishNotReadyAddresses=true nodes: sets.New(nodeA), // one-node zone }, - wantClusterEndpoints: map[string]lbEndpoints{ - getServicePortKey(tcp, "tcp-example"): { + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): { V4IPs: []string{"10.0.0.2", "10.0.0.3", "10.0.0.4"}, V6IPs: []string{"2001:db2::2", "2001:db2::3", "2001:db2::4"}, Port: 80}}, - wantNodeEndpoints: map[string]map[string]lbEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, // local endpoints not filled in, since service is not ETP or ITP local + }, + // According to https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports, the following + // should be supported. However, in OVNK, this was never implemented. + { + name: "multiple slices with same port name, different ports, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.0.0.2", "10.1.1.2"), + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab24", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(8080)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.0.0.3", "10.2.2.3"), + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), + nodes: sets.New(nodeA), // one-node zone + }, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}}, + wantError: fmt.Errorf("OVN Kubernetes does not support more than one target port per service port for " + + "service \"test/service-test\": servicePortKey \"TCP/tcp-example\" portNumbers [80 8080]"), + }, + // The following is not supported by Kubernetes - OVNK will just ignore this and look up the matching + // protocol (TCP) only. + { + name: "multiple slices with same port name, different protocols, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.0.0.2", "10.1.1.2"), + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab24", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolUDP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.0.0.3", "10.2.2.3"), + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), + nodes: sets.New(nodeA), // one-node zone + }, + wantClusterEndpoints: util.PortToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + util.GetServicePortKey(tcp, "tcp-example"): {nodeA: util.LBEndpoints{V4IPs: []string{"10.0.0.2", "10.1.1.2"}, Port: 80}}}, + }, + // The following should never happen in k8s - endpoints should not be empty. + { + name: "multiple slices with same port name, empty endpoints, invalid ports, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{kubetest.MakeUnassignedEndpoint("10.1.1.2")}, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{}, + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), + nodes: sets.New(nodeA), // one-node zone + }, + wantClusterEndpoints: util.PortToLBEndpoints{"TCP/tcp-example": util.LBEndpoints{Port: 80, V4IPs: []string{"10.1.1.2"}, V6IPs: []string(nil)}}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, + }, + // The following should never happen in k8s - invalid port number. + { + name: "single slices, invalid port number, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(-2)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: []discovery.Endpoint{kubetest.MakeUnassignedEndpoint("10.1.1.2")}, + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example", 80, tcp), + nodes: sets.New(nodeA), // one-node zone + }, + wantClusterEndpoints: util.PortToLBEndpoints{}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, + }, + { + name: "nil service without endpoints, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{}, + svc: nil, + nodes: sets.New(nodeA), + }, + wantClusterEndpoints: util.PortToLBEndpoints{}, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{}, + }, + { + name: "multiple slices, nil service with endpoints, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.1.1.2"), + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example2"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.1.1.3"), + }, + }, + svc: nil, + nodes: sets.New(nodeA), + }, + wantClusterEndpoints: util.PortToLBEndpoints{ + "TCP/tcp-example": util.LBEndpoints{Port: 80, V4IPs: []string{"10.1.1.2"}, V6IPs: []string(nil)}, + "TCP/tcp-example2": util.LBEndpoints{Port: 80, V4IPs: []string{"10.1.1.3"}, V6IPs: []string(nil)}, + }, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + "TCP/tcp-example": map[string]util.LBEndpoints{"node-a": {Port: 80, V4IPs: []string{"10.1.1.2"}, V6IPs: []string(nil)}}, + "TCP/tcp-example2": map[string]util.LBEndpoints{"node-a": {Port: 80, V4IPs: []string{"10.1.1.3"}, V6IPs: []string(nil)}}, + }, + }, + { + name: "multiple slices, service selects correct slice, ETP=local", + args: args{ + slices: []*discovery.EndpointSlice{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.1.1.2"), + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "svc-ab23", + Namespace: "ns", + Labels: map[string]string{discovery.LabelServiceName: "svc"}, + }, + Ports: []discovery.EndpointPort{ + { + Name: ptr.To("tcp-example2"), + Protocol: ptr.To(corev1.ProtocolTCP), + Port: ptr.To(int32(80)), + }, + }, + AddressType: discovery.AddressTypeIPv4, + Endpoints: kubetest.MakeReadyEndpointList(nodeA, "10.1.1.3"), + }, + }, + svc: getSampleServiceWithOnePortAndETPLocal("tcp-example2", 80, tcp), + nodes: sets.New(nodeA), + }, + wantClusterEndpoints: util.PortToLBEndpoints{ + "TCP/tcp-example2": util.LBEndpoints{Port: 80, V4IPs: []string{"10.1.1.3"}, V6IPs: []string(nil)}, + }, + wantNodeEndpoints: util.PortToNodeToLBEndpoints{ + "TCP/tcp-example2": map[string]util.LBEndpoints{"node-a": {Port: 80, V4IPs: []string{"10.1.1.3"}, V6IPs: []string(nil)}}, + }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - portToClusterEndpoints, portToNodeToEndpoints := getEndpointsForService( - tt.args.slices, tt.args.svc, tt.args.nodes, types.DefaultNetworkName) + needsLocalEndpoints := tt.args.svc == nil || util.ServiceExternalTrafficPolicyLocal(tt.args.svc) || util.ServiceInternalTrafficPolicyLocal(tt.args.svc) + portToClusterEndpoints, portToNodeToEndpoints, err := util.GetEndpointsForService( + tt.args.slices, tt.args.svc, tt.args.nodes, true, needsLocalEndpoints) assert.Equal(t, tt.wantClusterEndpoints, portToClusterEndpoints) assert.Equal(t, tt.wantNodeEndpoints, portToNodeToEndpoints) - + if tt.wantError == nil { + assert.NoError(t, err) + } else { + assert.ErrorContains(t, err, tt.wantError.Error()) + } }) } } @@ -4209,12 +4489,12 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, @@ -4235,13 +4515,13 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1", "192.168.1.1"}, V6IPs: []string{"fe00:0:0:0:1::2", "fe00:0:0:0:2::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, @@ -4263,13 +4543,13 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, Port: 8080, }, - nodeEndpoints: map[string]lbEndpoints{ + nodeEndpoints: util.PortToLBEndpoints{ nodeA: { V4IPs: []string{"192.168.0.1"}, V6IPs: []string{"fe00:0:0:0:1::2"}, @@ -4290,7 +4570,7 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.1.1"}, // on nodeB V6IPs: []string{"fe00:0:0:0:2::2"}, // on nodeB Port: 8080, @@ -4312,7 +4592,7 @@ func Test_makeNodeSwitchTargetIPs(t *testing.T) { vips: []string{"1.2.3.4", "fe10::1"}, protocol: corev1.ProtocolTCP, inport: 80, - clusterEndpoints: lbEndpoints{ + clusterEndpoints: util.LBEndpoints{ V4IPs: []string{"192.168.1.1"}, // on nodeB V6IPs: []string{"fe00:0:0:0:2::2"}, // on nodeB Port: 8080, diff --git a/go-controller/pkg/ovn/controller/services/services_controller.go b/go-controller/pkg/ovn/controller/services/services_controller.go index 3f4275e028..21bae7fc9b 100644 --- a/go-controller/pkg/ovn/controller/services/services_controller.go +++ b/go-controller/pkg/ovn/controller/services/services_controller.go @@ -464,7 +464,7 @@ func (c *Controller) syncService(key string) error { } // Build the abstract LB configs for this service - perNodeConfigs, templateConfigs, clusterConfigs := buildServiceLBConfigs(service, endpointSlices, c.nodeInfos, c.useLBGroups, c.useTemplates, c.netInfo.GetNetworkName()) + perNodeConfigs, templateConfigs, clusterConfigs := buildServiceLBConfigs(service, endpointSlices, c.nodeInfos, c.useLBGroups, c.useTemplates) klog.V(5).Infof("Built service %s LB cluster-wide configs for network=%s: %#v", key, c.netInfo.GetNetworkName(), clusterConfigs) klog.V(5).Infof("Built service %s LB per-node configs for network=%s: %#v", key, c.netInfo.GetNetworkName(), perNodeConfigs) klog.V(5).Infof("Built service %s LB template configs for network=%s: %#v", key, c.netInfo.GetNetworkName(), templateConfigs) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 31a48d0c99..709e06b3f9 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -288,6 +288,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e } } // CASE 3: EIP object update + var errs []error if old != nil && new != nil { oldEIP := old newEIP := new @@ -374,7 +375,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } if err := e.addNamespaceEgressIPAssignments(ni, newEIP.Name, newEIP.Status.Items, mark, namespace, newEIP.Spec.PodSelector); err != nil { - return fmt.Errorf("network %s: failed to add namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err)) } } } @@ -403,16 +404,13 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e return fmt.Errorf("network %s: failed to delete pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) } } - if util.PodCompleted(pod) { - continue - } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { - return fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } } } @@ -452,7 +450,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e podLabels := labels.Set(pod.Labels) if newPodSelector.Matches(podLabels) { if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { - return fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } } } @@ -471,12 +469,9 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e return fmt.Errorf("network %s: failed to delete pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) } } - if util.PodCompleted(pod) { - continue - } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { - return fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } } } @@ -484,7 +479,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e } } } - return nil + return utilerrors.Join(errs...) } // reconcileEgressIPNamespace reconciles the database configuration setup in nbdb @@ -522,6 +517,7 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa if err != nil { return err } + var errs []error for _, egressIP := range egressIPs { if err := e.egressIPCache.DoWithLock(egressIP.Name, func(key string) error { // get latest egressIP object after we get cache lock to serialize egress ip operations @@ -563,10 +559,10 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa return nil }); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } // reconcileEgressIPPod reconciles the database configuration setup in nbdb @@ -598,6 +594,7 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err if err != nil { return err } + var errs []error for _, egressIP := range egressIPs { if err := e.egressIPCache.DoWithLock(egressIP.Name, func(key string) error { // get latest egressIP object after we get cache lock to serialize egress ip operations @@ -712,10 +709,10 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err return nil }); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } // main reconcile functions end here and local zone controller functions begin @@ -725,16 +722,17 @@ func (e *EgressIPController) addEgressIPAssignments(name string, statusAssignmen if err != nil { return err } + var errs []error for _, namespace := range namespaces { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } if err := e.addNamespaceEgressIPAssignments(ni, name, statusAssignments, mark, namespace, podSelector); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } func (e *EgressIPController) addNamespaceEgressIPAssignments(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, @@ -756,12 +754,13 @@ func (e *EgressIPController) addNamespaceEgressIPAssignments(ni util.NetInfo, na return err } } + var errs []error for _, pod := range pods { if err := e.addPodEgressIPAssignmentsWithLock(ni, name, statusAssignments, mark, pod); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } func (e *EgressIPController) addPodEgressIPAssignmentsWithLock(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, pod *corev1.Pod) error { @@ -778,9 +777,9 @@ func (e *EgressIPController) addPodEgressIPAssignmentsWithLock(ni util.NetInfo, // requires holding the podAssignmentMutex lock func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, pod *corev1.Pod) error { podKey := getPodKey(pod) - // If pod is already in succeeded or failed state, return it without proceeding further. - if util.PodCompleted(pod) { - klog.Infof("Pod %s is already in completed state, skipping egress ip assignment", podKey) + // Ignore completed pods, host networked pods, pods not scheduled + if !util.PodNeedsSNAT(pod) { + klog.Infof("Pod %s is not in desired state, skipping egress ip assignment", podKey) return nil } // If statusAssignments is empty just return, not doing this will delete the @@ -2011,7 +2010,7 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { nadName = nadNames[0] // there should only be one active network } for _, pod := range pods { - if util.PodCompleted(pod) || !util.PodScheduled(pod) || util.PodWantsHostNetwork(pod) { + if !util.PodNeedsSNAT(pod) { continue } if egressLocalNodesCache.Len() == 0 && !e.isPodScheduledinLocalZone(pod) { diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 7455d9ca40..131c63d354 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -11154,6 +11154,507 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" ), ) + ginkgo.DescribeTable( + "should ensure EgressIP skips host-network and pending pods from namespace", + func(includeUnscheduled, includeScheduledButNoIP, includeHostNetwork, interconnect, isPodRemote bool) { + app.Action = func(*cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + config.OVNKubernetesFeature.EnableInterconnect = interconnect + + egressIP1 := "192.168.126.101" + node1IPv4 := "192.168.126.12" + node1IPv4CIDR := node1IPv4 + "/24" + node2IPv4 := "192.168.126.51" + node2IPv4CIDR := node2IPv4 + "/24" + + egressPod3 := *newPodWithLabels(eipNamespace, "egress-pod3", node1Name, podV4IP, egressPodLabel) + egressPod4 := *newPodWithLabels(eipNamespace, "egress-pod4", node1Name, podV4IP2, egressPodLabel) + egressNamespace := newNamespace(eipNamespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4CIDR), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node1Subnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), + "k8s.ovn.org/zone-name": "global", + } + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4CIDR), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node2Subnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.51/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "89fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), + "k8s.ovn.org/zone-name": "global", + } + if isPodRemote { + annotations["k8s.ovn.org/zone-name"] = "remote" + annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node2Switch := &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + } + + pods := []corev1.Pod{egressPod3, egressPod4} + if includeUnscheduled { + unScheduledPod := corev1.Pod{ + ObjectMeta: newPodMeta(eipNamespace, "egress-pod", egressPodLabel), + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "containerName", + Image: "containerImage", + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + }, + } + pods = append(pods, unScheduledPod) + } + var pendingScheduledPodWithNoIP *corev1.Pod + if includeScheduledButNoIP { + pendingScheduledPodWithNoIP = newPodWithLabels(eipNamespace, "egress-pod1", node2Name, "", egressPodLabel) + pendingScheduledPodWithNoIP.Status.Phase = corev1.PodPending + pods = append(pods, *pendingScheduledPodWithNoIP) + } + if includeHostNetwork { + hnPod := *newPodWithLabels(eipNamespace, "egress-pod2", node2Name, node2IPv4, egressPodLabel) + hnPod.Spec.HostNetwork = true + pods = append(pods, hnPod) + } + + dynamicNeighRouters := "true" + if config.OVNKubernetesFeature.EnableInterconnect { + dynamicNeighRouters = "false" + } + logicalRouterOptions := map[string]string{ + "dynamic_neigh_routers": dynamicNeighRouters, + } + expectedNatLogicalPort1 := "k8s-node1" + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + // stale LRP reference to be removed by EIP controller sync function. + Policies: []string{"hostnet-pod-reroute-UUID1"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + // stale NAT reference to be removed by EIP controller sync function. + Nat: []string{"hostnet-pod-egressip-nat-UUID1"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + // stale LRP and SNAT entries (for a host networked pod) to be removed by EIP controller sync function. + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", node2IPv4), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: getEgressIPLRPReRouteDbIDs(egressIPName, eipNamespace, "egress-pod2", IPFamilyValueV4, types.DefaultNetworkName, DefaultNetworkControllerName).GetExternalIDs(), + UUID: "hostnet-pod-reroute-UUID1", + }, + &nbdb.NAT{ + UUID: "hostnet-pod-egressip-nat-UUID1", + LogicalIP: node2IPv4, + ExternalIP: egressIP1, + ExternalIDs: getEgressIPNATDbIDs(egressIPName, eipNamespace, "egress-pod2", IPFamilyValueV4, DefaultNetworkControllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + node1Switch, + node2Switch, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &corev1.NodeList{ + Items: []corev1.Node{node1, node2}, + }, + &corev1.NamespaceList{ + Items: []corev1.Namespace{*egressNamespace}, + }, + &corev1.PodList{ + Items: pods, + }, + ) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod3, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + if pendingScheduledPodWithNoIP != nil { + fakeOvn.controller.logicalPortCache.add(pendingScheduledPodWithNoIP, "", types.DefaultNetworkName, "", nil, []*net.IPNet{}) + } + i, n, _ = net.ParseCIDR(podV4IP2 + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod4, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if isPodRemote { + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, false) + } + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressSVCServedPodsASv4, _ := buildEgressServiceAddressSets(nil) + egressIPServedPodsASv4, _ := buildEgressIPServedPodsAddressSets([]string{podV4IP, podV4IP2}, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName) + egressNodeIPsASv4, _ := buildEgressIPNodeAddressSets([]string{node1IPv4, node2IPv4}) + + node1Switch.QOSRules = []string{"default-QoS-UUID"} + if !isPodRemote { + node2Switch.QOSRules = []string{"default-QoS-UUID"} + } + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", + egressIPServedPodsASv4.Name, egressSVCServedPodsASv4.Name, egressNodeIPsASv4.Name), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, + ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + getNoReRouteReplyTrafficPolicy(types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToPodDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToJoinDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "default-no-reroute-node-UUID", "default-no-reroute-reply-traffic"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + node1Switch, + node2Switch, + getDefaultQoSRule(false, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + egressSVCServedPodsASv4, egressIPServedPodsASv4, egressNodeIPsASv4, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + for i := 0; i < 10; i++ { + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node1Name, egressIPName, egressIP1) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + eips, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", + egressIPServedPodsASv4.Name, egressSVCServedPodsASv4.Name, egressNodeIPsASv4.Name), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, + ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + getNoReRouteReplyTrafficPolicy(types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToPodDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToJoinDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod3.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod3.Namespace, egressPod3.Name, IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + UUID: "reroute-UUID1", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod4.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod4.Namespace, egressPod4.Name, IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + UUID: "reroute-UUID2", + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "default-no-reroute-node-UUID", "reroute-UUID1", "reroute-UUID2", "default-no-reroute-reply-traffic"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Nat: []string{"pod1-egressip-nat-UUID1", "pod4-egressip-nat-UUID1"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.NAT{ + UUID: "pod1-egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: eips[0], + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod3.Namespace, egressPod3.Name, IPFamilyValueV4, fakeOvn.controller.controllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.NAT{ + UUID: "pod4-egressip-nat-UUID1", + LogicalIP: podV4IP2, + ExternalIP: eips[0], + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod4.Namespace, egressPod4.Name, IPFamilyValueV4, fakeOvn.controller.controllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + node1Switch, + node2Switch, + getDefaultQoSRule(false, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + egressSVCServedPodsASv4, egressIPServedPodsASv4, egressNodeIPsASv4, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + // Try out EgressIP failover scenarios for the next iteration. + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, egressIPName, egressIP1) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + _, nodes = getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.Name)) + // remove label from node1 and node2. + node1.Labels = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node2.Labels = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + status := []egressipv1.EgressIPStatusItem{} + err = fakeOvn.controller.eIPC.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgo.Entry("unscheduled pending pod only, interconnect disabled", true, false, false, false, false), + ginkgo.Entry("scheduled pending pod only, interconnect disabled", false, true, false, false, false), + ginkgo.Entry("host-networked pod only, interconnect disabled", false, false, true, false, false), + ginkgo.Entry("unscheduled pending pod only, interconnect enabled, local pod", true, false, false, true, false), + ginkgo.Entry("scheduled pending pod only, interconnect enabled, local pod", false, true, false, true, false), + ginkgo.Entry("host-networked pod only, interconnect enabled, local pod", false, false, true, true, false), + ginkgo.Entry("unscheduled pending pod only, interconnect enabled, remote pod", true, false, false, true, true), + ginkgo.Entry("scheduled pending pod only, interconnect enabled, remote pod", false, true, false, true, true), + ginkgo.Entry("host-networked pod only, interconnect enabled, remote pod", false, false, true, true, true), + ) + ginkgo.It("should not deadlock when two EgressIP pods are hosted and egress through opposite nodes", func() { // This test is designed to reproduce a deadlock issue. // To reproduce undo the corrspoinding fix introduced in this commit and diff --git a/go-controller/pkg/testing/kube.go b/go-controller/pkg/testing/kube.go index d47564d61a..29e5f260dd 100644 --- a/go-controller/pkg/testing/kube.go +++ b/go-controller/pkg/testing/kube.go @@ -52,6 +52,18 @@ func MakeTerminatingNonServingEndpoint(node string, addresses ...string) discove } } +func MakeUnassignedEndpoint(addresses ...string) discovery.Endpoint { + return discovery.Endpoint{ + Conditions: discovery.EndpointConditions{ + Ready: ptr.To(true), + Serving: ptr.To(true), + Terminating: ptr.To(false), + }, + Addresses: addresses, + NodeName: nil, + } +} + func MirrorEndpointSlice(defaultEndpointSlice *discovery.EndpointSlice, network string, keepEndpoints bool) *discovery.EndpointSlice { mirror := defaultEndpointSlice.DeepCopy() mirror.Name = defaultEndpointSlice.Name + "-mirrored" diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 7fb84610ea..0e030a4ae3 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -867,13 +867,6 @@ func GetEligibleEndpointAddressesFromSlices(endpointSlices []*discovery.Endpoint return getEligibleEndpointAddresses(getEndpointsFromEndpointSlices(endpointSlices), service, "") } -// GetLocalEligibleEndpointAddressesFromSlices returns a set of IP addresses of endpoints that are local to the specified node -// and are eligible. -func GetLocalEligibleEndpointAddressesFromSlices(endpointSlices []*discovery.EndpointSlice, service *corev1.Service, nodeName string) sets.Set[string] { - endpoints := getEligibleEndpointAddresses(getEndpointsFromEndpointSlices(endpointSlices), service, nodeName) - return sets.New(endpoints...) -} - // DoesEndpointSliceContainEndpoint returns true if the endpointslice // contains an endpoint with the given IP, port and Protocol and if this endpoint is considered eligible. func DoesEndpointSliceContainEligibleEndpoint(endpointSlice *discovery.EndpointSlice, @@ -893,7 +886,8 @@ func DoesEndpointSliceContainEligibleEndpoint(endpointSlice *discovery.EndpointS // HasLocalHostNetworkEndpoints returns true if any of the nodeAddresses appear in given the set of // localEndpointAddresses. This is useful to check whether any of the provided local endpoints are host-networked. -func HasLocalHostNetworkEndpoints(localEndpointAddresses sets.Set[string], nodeAddresses []net.IP) bool { +func HasLocalHostNetworkEndpoints(portToLBEndpoints PortToLBEndpoints, nodeAddresses []net.IP) bool { + localEndpointAddresses := portToLBEndpoints.GetAddresses() if len(localEndpointAddresses) == 0 || len(nodeAddresses) == 0 { return false } diff --git a/go-controller/pkg/util/kube_test.go b/go-controller/pkg/util/kube_test.go index d48da80bd4..63c0478768 100644 --- a/go-controller/pkg/util/kube_test.go +++ b/go-controller/pkg/util/kube_test.go @@ -13,7 +13,6 @@ import ( discovery "k8s.io/api/discovery/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8stypes "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/sets" clientsetfake "k8s.io/client-go/kubernetes/fake" "k8s.io/utils/ptr" @@ -610,22 +609,26 @@ func TestHasLocalHostNetworkEndpoints(t *testing.T) { nodeAddresses := []net.IP{ep1IP} var tests = []struct { name string - localEndpoints sets.Set[string] + localEndpoints PortToLBEndpoints want bool }{ { "Tests with local endpoints that include the node address", - sets.New(ep1Address, ep2Address), + PortToLBEndpoints{"test": LBEndpoints{ + V4IPs: []string{ep1Address, ep2Address}, + }}, true, }, { "Tests against a different local endpoint than the node address", - sets.New(ep2Address), + PortToLBEndpoints{"test": LBEndpoints{ + V4IPs: []string{ep2Address}, + }}, false, }, { "Tests against no local endpoints", - sets.New[string](), + PortToLBEndpoints{"test": LBEndpoints{}}, false, }, } diff --git a/go-controller/pkg/util/ovs.go b/go-controller/pkg/util/ovs.go index f6e3ca0ad0..12ed487c6a 100644 --- a/go-controller/pkg/util/ovs.go +++ b/go-controller/pkg/util/ovs.go @@ -362,11 +362,6 @@ func RunOVSAppctlWithTimeout(timeout int, args ...string) (string, string, error return strings.Trim(strings.TrimSpace(stdout.String()), "\""), stderr.String(), err } -// RunOVSAppctl runs a command via ovs-appctl. -func RunOVSAppctl(args ...string) (string, string, error) { - return RunOVSAppctlWithTimeout(ovsCommandTimeout, args...) -} - // RunOVNAppctlWithTimeout runs a command via ovn-appctl. If ovn-appctl is not present, then it // falls back to using ovs-appctl. func RunOVNAppctlWithTimeout(timeout int, args ...string) (string, string, error) { @@ -802,7 +797,7 @@ func DetectSCTPSupport() (bool, error) { // DetectCheckPktLengthSupport checks if OVN supports check packet length action in OVS kernel datapath func DetectCheckPktLengthSupport(bridge string) (bool, error) { - stdout, stderr, err := RunOVSAppctl("dpif/show-dp-features", bridge) + stdout, stderr, err := RunOvsVswitchdAppCtl("dpif/show-dp-features", bridge) if err != nil { klog.Errorf("Failed to query OVS for check packet length support, "+ "stdout: %q, stderr: %q, error: %v", stdout, stderr, err) @@ -824,7 +819,7 @@ func DetectCheckPktLengthSupport(bridge string) (bool, error) { func SetStaticFDBEntry(bridge, port string, mac net.HardwareAddr) error { // Assume default VLAN for local port vlan := "0" - stdout, stderr, err := RunOVSAppctl("fdb/add", bridge, port, vlan, mac.String()) + stdout, stderr, err := RunOvsVswitchdAppCtl("fdb/add", bridge, port, vlan, mac.String()) if err != nil { return fmt.Errorf("failed to add FDB entry to OVS for LOCAL port, "+ "stdout: %q, stderr: %q, error: %v", stdout, stderr, err) diff --git a/go-controller/pkg/util/ovs_unit_test.go b/go-controller/pkg/util/ovs_unit_test.go index ba6e63678a..3dcddc9a88 100644 --- a/go-controller/pkg/util/ovs_unit_test.go +++ b/go-controller/pkg/util/ovs_unit_test.go @@ -1092,47 +1092,6 @@ func TestRunOVSAppctlWithTimeout(t *testing.T) { } } -func TestRunOVSAppctl(t *testing.T) { - mockKexecIface := new(mock_k8s_io_utils_exec.Interface) - mockExecRunner := new(mocks.ExecRunner) - mockCmd := new(mock_k8s_io_utils_exec.Cmd) - // below is defined in ovs.go - runCmdExecRunner = mockExecRunner - // note runner is defined in ovs.go file - runner = &execHelper{exec: mockKexecIface} - tests := []struct { - desc string - expectedErr error - onRetArgsExecUtilsIface *ovntest.TestifyMockHelper - onRetArgsKexecIface *ovntest.TestifyMockHelper - }{ - { - desc: "negative: run `ovs-appctl` command", - expectedErr: fmt.Errorf("failed to execute ovs-appctl command"), - onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string"}, RetArgList: []interface{}{nil, nil, fmt.Errorf("failed to execute ovs-appctl command")}}, - onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string"}, RetArgList: []interface{}{mockCmd}}, - }, - { - desc: "positive: run `ovs-appctl` command", - expectedErr: nil, - onRetArgsExecUtilsIface: &ovntest.TestifyMockHelper{OnCallMethodName: "RunCmd", OnCallMethodArgType: []string{"*mocks.Cmd", "string", "[]string", "string"}, RetArgList: []interface{}{bytes.NewBuffer([]byte("testblah")), bytes.NewBuffer([]byte("")), nil}}, - onRetArgsKexecIface: &ovntest.TestifyMockHelper{OnCallMethodName: "Command", OnCallMethodArgType: []string{"string", "string"}, RetArgList: []interface{}{mockCmd}}, - }, - } - for i, tc := range tests { - t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) { - ovntest.ProcessMockFn(&mockExecRunner.Mock, *tc.onRetArgsExecUtilsIface) - ovntest.ProcessMockFn(&mockKexecIface.Mock, *tc.onRetArgsKexecIface) - - _, _, e := RunOVSAppctl() - - assert.Equal(t, tc.expectedErr, e) - mockExecRunner.AssertExpectations(t) - mockKexecIface.AssertExpectations(t) - }) - } -} - func TestRunOVNAppctlWithTimeout(t *testing.T) { mockKexecIface := new(mock_k8s_io_utils_exec.Interface) mockExecRunner := new(mocks.ExecRunner) diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index fda62d3fcd..ef9d461518 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -7,6 +7,7 @@ import ( "hash/fnv" "net" "regexp" + "slices" "sort" "strconv" "strings" @@ -14,6 +15,7 @@ import ( "github.com/urfave/cli/v2" "golang.org/x/exp/constraints" + "golang.org/x/exp/maps" corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" @@ -679,3 +681,391 @@ func MustParseCIDR(cidr string) *net.IPNet { } return ipNet } + +// GetServicePortKey creates a unique identifier key for a service port using protocol and name. +// e.g. GetServicePortKey("TCP", "http") returns "TCP/http". +func GetServicePortKey(protocol corev1.Protocol, name string) string { + return fmt.Sprintf("%s/%s", protocol, name) +} + +// IPPort represents an IP address and port combination for load balancer destinations. +// e.g. IPPort{IP: "192.168.1.10", Port: 8080}. +type IPPort struct { + IP string + Port int32 +} + +// LBEndpoints contains load balancer endpoint information with IPv4 and IPv6 addresses. +// Port is the endpoint port (the one exposed by the pod) and IPs are the IP addresses of the backend pods. +// e.g. LBEndpoints{Port: 8080, V4IPs: []string{"192.168.1.10", "192.168.1.11"}, V6IPs: []string{"2001:db8::1"}}. +// TBD: currently, OVNK only supports a single backend port per named port. +type LBEndpoints struct { + Port int32 + V4IPs []string + V6IPs []string +} + +// GetV4Destinations builds IPv4 destination mappings from endpoint addresses to ports. +// e.g. for V4IPs ["192.168.1.10", "192.168.1.11"] and Port 8080, returns +// []IPPort{{IP: "192.168.1.10", Port: 8080}, {IP: "192.168.1.11", Port: 8080}}. +func (le LBEndpoints) GetV4Destinations() []IPPort { + destinations := []IPPort{} + for _, ip := range le.V4IPs { + destinations = append(destinations, IPPort{IP: ip, Port: le.Port}) + } + return destinations +} + +// GetV6Destinations builds IPv6 destination mappings from endpoint addresses to ports. +// e.g. for V6IPs ["2001:db8::1", "2001:db8::2"] and Port 8080, returns +// []IPPort{{IP: "2001:db8::1", Port: 8080}, {IP: "2001:db8::2", Port: 8080}}. +func (le LBEndpoints) GetV6Destinations() []IPPort { + destinations := []IPPort{} + for _, ip := range le.V6IPs { + destinations = append(destinations, IPPort{IP: ip, Port: le.Port}) + } + return destinations +} + +// PortToLBEndpoints maps service port keys (protocol + service port name) to load balancer endpoints. +// e.g. map["TCP/http"] = LBEndpoints{Port: 8080, V4IPs: []string{"192.168.1.10"}}. +// Port is the endpoint port (the one exposed by the pod) and IPs are the IP addresses of the backend pods. +type PortToLBEndpoints map[string]LBEndpoints + +// GetAddresses returns all unique IP addresses from all ports in the PortToLBEndpoints map. +// e.g. for PortToLBEndpoints{"TCP/http": {Port: 8080, V4IPs: ["192.168.1.10"]}, "UDP/dns": {Port: 53, V4IPs: ["192.168.1.11"]}}, +// returns sets.Set{"192.168.1.10", "192.168.1.11"}. +func (p PortToLBEndpoints) GetAddresses() sets.Set[string] { + s := sets.New[string]() + for _, lbEndpoints := range p { + s.Insert(lbEndpoints.V4IPs...) + s.Insert(lbEndpoints.V6IPs...) + } + return s +} + +// PortToNodeToLBEndpoints maps service port keys to node names and their load balancer endpoints. +// e.g. map["TCP/http"]["node1"] = LBEndpoints{Port: 8080, V4IPs: []string{"192.168.1.10"}}. +type PortToNodeToLBEndpoints map[string]map[string]LBEndpoints + +// GetNode extracts all port endpoints for a specific node from the PortToNodeToLBEndpoints map. +// e.g. for PortToNodeToLBEndpoints{"TCP/http": {"node1": {Port: 8080, V4IPs: ["192.168.1.10"]}, "node2": {Port: 8080, V4IPs: ["192.168.1.11"]}}} +// and node "node1", returns PortToLBEndpoints{"TCP/http": {Port: 8080, V4IPs: ["192.168.1.10"]}}. +func (p PortToNodeToLBEndpoints) GetNode(node string) PortToLBEndpoints { + r := make(PortToLBEndpoints) + for port, nodeToLBEndpoints := range p { + if lbe, ok := nodeToLBEndpoints[node]; ok { + r[port] = lbe + } + } + return r +} + +// GetEndpointsForService extracts endpoints from EndpointSlices for a given Service. +// It returns two maps. +// 1. Global endpoints: maps service port keys ("protocol/portname") to all endpoint addresses (if needsGlobalEndpoints). +// 2. Local endpoints: maps service port keys to per-node endpoint addresses (if needsLocalEndpoints). +// +// This method is common logic for both nodePortWatcher and the services Controller to build their +// service endpoints +// +// Special handling when service is nil: +// When service is nil (typically during deletion scenarios), all endpoint ports are accepted +// and processed without validation against service port specifications. This allows cleanup +// operations to proceed even when the service object is no longer available (needed for +// the nodePortWatcher). +// +// When service is not nil: +// Endpoint ports are validated against the service port specifications, and only matching +// ports are processed to ensure consistency with the service configuration. +// +// Parameters: +// - slices: EndpointSlices associated with the service +// - service: The Kubernetes Service object (nil during deletion scenarios) +// - nodes: Set of node names in the OVN zone (used to filter local endpoints) +// - needsGlobalEndpoints: request to populate PortToLBEndpoints +// - needsLocalEndpoints: request to populate PortToNodeToLBEndpoints +// +// Returns: +// - PortToLBEndpoints: Global endpoint mapping by port (empty if not needed) +// - PortToNodeToLBEndpoints: Per-node endpoint mapping by port (empty if not needed) +// - error: Validation errors encountered during processing +// +// Example output: +// +// Global: {"TCP/http": {Port: 8080, V4IPs: ["192.168.1.10", "192.168.1.11"]}} +// Local: {"TCP/http": {"node1": {Port: 8080, V4IPs: ["192.168.1.10"]}}} +// +// Validation requirements: +// - EndpointSlice port names must match Service port names (when service is not nil) +// - Only one protocol per port name is supported (Kubernetes requirement). +// - Only one target port number per protocol/port combination is supported (OVNKubernetes limitation). +func GetEndpointsForService(endpointSlices []*discoveryv1.EndpointSlice, service *corev1.Service, + nodes sets.Set[string], needsGlobalEndpoints, needsLocalEndpoints bool) (PortToLBEndpoints, PortToNodeToLBEndpoints, error) { + + var validationErrors []error + globalEndpoints := make(PortToLBEndpoints) + localEndpoints := make(PortToNodeToLBEndpoints) + + addValidationError := func(msg string, detail interface{}) { + ns, name := "", "" + if service != nil { + ns, name = service.GetNamespace(), service.GetName() + } + validationErrors = append(validationErrors, fmt.Errorf("%s for service \"%s/%s\": %v", msg, ns, name, detail)) + } + + // Parse endpoint slices into structured format: portName -> protocol -> portNumber -> endpoints. + targetEndpoints := newTargetEndpoints(endpointSlices) + + // Build list of valid service port keys, if a non-nil service was provided. Otherwise, if service is nil, this + // is a deletion (at least for the iptables / nftables logic) and thus accept all endpoints. + validServicePortKeys := map[string]bool{} + if service != nil { + for _, servicePort := range service.Spec.Ports { + name := GetServicePortKey(servicePort.Protocol, servicePort.Name) + validServicePortKeys[name] = true + } + } + + for portName, protocolMap := range targetEndpoints { + for protocol, portNumberMap := range protocolMap { + // If service is not nil, there's a valid 1 to 1 mapping of service name + protocol to slice name + protocol. + // Therefore, go through all ports of the service, and skip if no match found. + slicePortKey := GetServicePortKey(protocol, portName) + if service != nil && !validServicePortKeys[slicePortKey] { + continue + } + + if len(portNumberMap) == 0 { + // Return an error here as this should not happen. + addValidationError("service protocol has no associated endpoints", + fmt.Sprintf("servicePortKey %q", slicePortKey)) + continue + } + + // Process the first (and typically only) target port number. + // OVN currently does not support multiple target port numbers for the same service name. + portNumbers := maps.Keys(portNumberMap) + slices.Sort(portNumbers) + if len(portNumbers) > 1 { + addValidationError("OVN Kubernetes does not support more than one target port per service port", + fmt.Sprintf("servicePortKey %q portNumbers %v", + slicePortKey, portNumbers)) + } + targetPortNumber := portNumbers[0] + endpointList := portNumberMap[targetPortNumber] + // Build global endpoint mapping. + if needsGlobalEndpoints { + lbe, err := buildLBEndpoints(service, targetPortNumber, endpointList) + if err != nil { + klog.Warningf("Failed to build global endpoints for port %s: %v", slicePortKey, err) + continue + } + globalEndpoints[slicePortKey] = lbe + } + // Build per-node endpoint mapping if needed for traffic policies. + if needsLocalEndpoints { + if lbe, err := buildNodeLBEndpoints(service, targetPortNumber, endpointList, nodes); err == nil { + localEndpoints[slicePortKey] = lbe + } + } + } + } + + // Log endpoint mappings for debugging. + serviceString := "" + if service != nil { + serviceString = fmt.Sprintf(" for %s/%s", service.Namespace, service.Name) + } + if needsGlobalEndpoints { + klog.V(5).Infof("Global endpoints%s: %v", serviceString, globalEndpoints) + } + if needsLocalEndpoints { + klog.V(5).Infof("Local endpoints%s: %v", serviceString, localEndpoints) + } + + return globalEndpoints, localEndpoints, errors.Join(validationErrors...) +} + +// groupEndpointsByNode organizes a list of endpoints by their associated node names. +// Endpoints without a NodeName are skipped, as they cannot be assigned to specific nodes. +// This is used for building per-node endpoint mappings for local traffic policies. +// +// Parameters: +// - endpoints: List of discovery endpoints to group +// +// Returns: +// - map[string][]discoveryv1.Endpoint: Node name to endpoints mapping +func groupEndpointsByNode(endpoints []discoveryv1.Endpoint) map[string][]discoveryv1.Endpoint { + nodeEndpoints := map[string][]discoveryv1.Endpoint{} + for _, endpoint := range endpoints { + if endpoint.NodeName == nil { + continue + } + nodeName := *endpoint.NodeName + nodeEndpoints[nodeName] = append(nodeEndpoints[nodeName], endpoint) + } + return nodeEndpoints +} + +// buildNodeLBEndpoints creates a per-node mapping of load balancer endpoints. +// Only nodes present in the provided node set are included in the result. +// This is used for services with local traffic policies that require per-node endpoint tracking. +// +// Parameters: +// - service: The Kubernetes Service object (for endpoint filtering) +// - portNumber: The target port number for the endpoints +// - endpoints: List of endpoints to process +// - nodes: Set of valid node names to include +// +// Returns: +// - map[string]LBEndpoints: Node name to LBEndpoints mapping +func buildNodeLBEndpoints(service *corev1.Service, portNumber int32, endpoints []discoveryv1.Endpoint, nodes sets.Set[string]) (map[string]LBEndpoints, error) { + nodeLBEndpoints := map[string]LBEndpoints{} + + nodeEndpoints := groupEndpointsByNode(endpoints) + for node, nodeEndpoints := range nodeEndpoints { + if !nodes.Has(node) { + continue + } + lbe, err := buildLBEndpoints(service, portNumber, nodeEndpoints) + if err != nil { + klog.Warningf("Failed to build node endpoints for node %s port %d: %v", node, portNumber, err) + continue + } + nodeLBEndpoints[node] = lbe + } + + if len(nodeLBEndpoints) == 0 { + return nodeLBEndpoints, fmt.Errorf("empty node lb endpoints") + } + return nodeLBEndpoints, nil +} + +// buildLBEndpoints constructs an LBEndpoints structure from a list of discovery endpoints. +// It filters endpoints for eligibility, separates IPv4 and IPv6 addresses, and returns +// an empty LBEndpoints if no valid addresses are found. +// +// Parameters: +// - service: The Kubernetes Service object (used for endpoint eligibility filtering) +// service may be nil! +// - port: The target port number for the endpoints +// - endpoints: List of discovery endpoints to process +// +// Returns: +// - LBEndpoints: Structure containing IPv4/IPv6 addresses and port number +func buildLBEndpoints(service *corev1.Service, port int32, endpoints []discoveryv1.Endpoint) (LBEndpoints, error) { + addresses := GetEligibleEndpointAddresses(endpoints, service) + v4IPs, v4ErrorNoIP := MatchAllIPStringFamily(false, addresses) + v6IPs, v6ErrorNoIP := MatchAllIPStringFamily(true, addresses) + + if v4ErrorNoIP != nil && v6ErrorNoIP != nil { + if service != nil { + return LBEndpoints{}, fmt.Errorf("empty IP address endpoints for service %s/%s", service.Namespace, service.Name) + } else { + return LBEndpoints{}, fmt.Errorf("empty IP address endpoints") + } + } + + if port <= 0 || port > 65535 { + if service != nil { + return LBEndpoints{}, fmt.Errorf("invalid endpoint port %d for service %s/%s: port must be between 1-65535", + port, service.Namespace, service.Name) + } else { + return LBEndpoints{}, fmt.Errorf("invalid endpoint port %d: port must be between 1-65535", port) + } + } + + return LBEndpoints{ + V4IPs: v4IPs, + V6IPs: v6IPs, + Port: port, + }, nil +} + +// targetEndpoints provides a hierarchical mapping of endpoint data from EndpointSlices. +// Structure: port name -> protocol -> port number -> list of endpoints +// Example: +// +// targetEndpoints{ +// "http": { +// corev1.ProtocolTCP: { +// 8080: []discoveryv1.Endpoint{...} +// } +// } +// } +type targetEndpoints map[string]map[corev1.Protocol]map[int32][]discoveryv1.Endpoint + +// addEndpoint adds a discovery endpoint to the TargetEndpoints structure. +// It initializes nested maps as needed to maintain the hierarchical structure. +// Multiple endpoints can be added for the same port name/protocol/port number combination. +// +// Parameters: +// - portName: Name of the port (can be empty string for unnamed ports) +// - proto: Protocol (TCP, UDP, SCTP) +// - portNumber: Target port number +// - endpoint: The discovery endpoint to add +func (te targetEndpoints) addEndpoint(portName string, proto corev1.Protocol, portNumber int32, endpoint discoveryv1.Endpoint) { + if _, ok := te[portName]; !ok { + te[portName] = make(map[corev1.Protocol]map[int32][]discoveryv1.Endpoint) + } + if _, ok := te[portName][proto]; !ok { + te[portName][proto] = make(map[int32][]discoveryv1.Endpoint) + } + if _, ok := te[portName][proto][portNumber]; !ok { + te[portName][proto][portNumber] = []discoveryv1.Endpoint{} + } + te[portName][proto][portNumber] = append(te[portName][proto][portNumber], endpoint) +} + +// newTargetEndpoints constructs a TargetEndpoints structure from a list of EndpointSlices. +// It processes all endpoints from all slices, organizing them by port name, protocol, and port number. +// FQDN address types are skipped. +// +// Parameters: +// - slices: List of EndpointSlices to process +// +// Returns: +// - TargetEndpoints: Hierarchically organized endpoint data +func newTargetEndpoints(slices []*discoveryv1.EndpointSlice) targetEndpoints { + te := targetEndpoints{} + + for _, slice := range slices { + if slice == nil { + continue + } + + if slice.AddressType == discoveryv1.AddressTypeFQDN { + continue // consider only v4 and v6, discard FQDN + } + + for _, slicePort := range slice.Ports { + // Protocol and Port should never be nil per API; thus ignore invalid entries and log. + if slicePort.Protocol == nil || slicePort.Port == nil { + klog.Warningf("Skipped invalid slice port %+v belonging to slice %+v", slicePort, slice) + continue + } + portName := getPortName(slicePort.Name) + for _, endpoint := range slice.Endpoints { + te.addEndpoint(portName, *slicePort.Protocol, *slicePort.Port, endpoint) + } + } + } + return te +} + +// getPortName safely extracts the port name from a pointer, returning empty string if nil. +// This handles the case where EndpointSlice ports may have unnamed ports. +// +// Parameters: +// - name: Pointer to port name (may be nil) +// +// Returns: +// - string: Port name or empty string if nil +func getPortName(name *string) string { + if name == nil { + return "" + } + return *name +} diff --git a/test/e2e/external_gateways.go b/test/e2e/external_gateways.go index 628866910b..6f32d78159 100644 --- a/test/e2e/external_gateways.go +++ b/test/e2e/external_gateways.go @@ -88,10 +88,12 @@ type gatewayTestIPs struct { var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { const ( - gwTCPPort = 80 - gwUDPPort = 90 - podTCPPort = 80 - podUDPPort = 90 + gwTCPPort = 80 + gwUDPPort = 90 + podTCPPort = 80 + podUDPPort = 90 + singleTargetRetries = 50 // enough attempts to avoid hashing to the same gateway + bfdTimeout = 4 * time.Second ) // Validate pods can reach a network running in a container's loopback address via @@ -577,7 +579,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { args := []string{"exec", srcPingPodName, "--"} if protocol == "tcp" { args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", gwIP, gwPort)) @@ -744,7 +746,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { // Picking only the first address, the one the udp listener is set for gwIP := addresses.targetIPs[0] - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -1110,7 +1112,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { ginkgo.By("Deleting one container") err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) @@ -1166,7 +1168,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -1188,7 +1190,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) // ECMP should direct all the traffic to the only container expectedHostName := hostNameForExternalContainer(gwContainers[0]) @@ -1327,7 +1329,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { ginkgo.By("Deleting one container") err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) pingSync = sync.WaitGroup{} tcpDumpSync = sync.WaitGroup{} @@ -1387,7 +1389,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { // Picking only the first address, the one the udp listener is set for gwIP := addresses.targetIPs[0] - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -1408,7 +1410,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) // ECMP should direct all the traffic to the only container expectedHostName := hostNameForExternalContainer(gwContainers[0]) @@ -1554,7 +1556,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { args := []string{"exec", srcPingPodName, "--"} if protocol == "tcp" { args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", gwIP, gwPort)) @@ -1849,7 +1851,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { // Picking only the first address, the one the udp listener is set for gwIP := addresses.targetIPs[0] - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -2215,7 +2217,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) @@ -2273,7 +2275,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -2295,7 +2297,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) // ECMP should direct all the traffic to the only container expectedHostName := hostNameForExternalContainer(gwContainers[0]) @@ -2432,7 +2434,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { ginkgo.By("Deleting one container") err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) pingSync = sync.WaitGroup{} tcpDumpSync = sync.WaitGroup{} @@ -2491,7 +2493,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { // Picking only the first address, the one the udp listener is set for gwIP := addresses.targetIPs[0] - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, gwIP, gwPort) if hostname != "" { returnedHostNames[hostname] = struct{}{} @@ -2512,7 +2514,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { err := providerCtx.DeleteExternalContainer(gwContainers[1]) framework.ExpectNoError(err, "failed to delete external container %s", gwContainers[1].Name) ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout + time.Sleep(bfdTimeout) // ECMP should direct all the traffic to the only container expectedHostName := hostNameForExternalContainer(gwContainers[0]) @@ -2663,7 +2665,7 @@ var _ = ginkgo.Describe("External Gateway", feature.ExternalGateway, func() { returnedHostNames := make(map[string]struct{}) gwIP := addresses.targetIPs[0] success := false - for i := 0; i < 20; i++ { + for i := 0; i < singleTargetRetries; i++ { args := []string{"exec", srcPingPodName, "--"} if protocol == "tcp" { args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", gwIP, gwPort)) diff --git a/test/e2e/service.go b/test/e2e/service.go index 6acfc77c8d..2fc970be96 100644 --- a/test/e2e/service.go +++ b/test/e2e/service.go @@ -75,8 +75,8 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { }) ginkgo.It("Allow connection to an external IP using a source port that is equal to a node port", func() { + var nodePort int32 = 31990 const ( - nodePort = 31990 connTimeout = "2" dstIPv4 = "1.1.1.1" dstPort = "80" @@ -86,10 +86,21 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { } ginkgo.By("create node port service") jig := e2eservice.NewTestJig(cs, f.Namespace.Name, serviceName) - _, err := jig.CreateTCPService(context.TODO(), func(svc *v1.Service) { - svc.Spec.Type = v1.ServiceTypeNodePort - svc.Spec.Ports[0].NodePort = nodePort - }) + err := wait.PollImmediate(10*time.Second, 60*time.Second, + func() (bool, error) { + _, err := jig.CreateTCPService(context.TODO(), func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeNodePort + svc.Spec.Ports[0].NodePort = nodePort + }) + if err != nil { + if strings.Contains(err.Error(), "provided port is already allocated") { + nodePort++ + return false, nil + } + return false, err + } + return true, nil + }) framework.ExpectNoError(err, "failed to create TCP node port service") ginkgo.By("create pod selected by node port service") serverPod := e2epod.NewAgnhostPod(f.Namespace.Name, "svc-backend", nil, nil, nil) @@ -100,7 +111,7 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { e2epod.NewPodClient(f).CreateSync(context.TODO(), clientPod) ginkgo.By("connect externally pinning the source port to equal the node port") _, err = e2ekubectl.RunKubectl(clientPod.Namespace, "exec", clientPod.Name, "--", "nc", - "-p", strconv.Itoa(nodePort), "-z", "-w", connTimeout, dstIPv4, dstPort) + "-p", strconv.Itoa(int(nodePort)), "-z", "-w", connTimeout, dstIPv4, dstPort) framework.ExpectNoError(err, "expected connection to succeed using source port identical to node port") }) @@ -917,14 +928,20 @@ var _ = ginkgo.Describe("Services", feature.Service, func() { framework.ExpectNoError(err) ginkgo.By("Waiting for the endpoints to pop up") + var expectedEndpointsNum int + if isDualStackCluster(nodes) { + expectedEndpointsNum = len(endPoints) * 2 + } else { + expectedEndpointsNum = len(endPoints) + } err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, f.Namespace.Name, - etpLocalServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + etpLocalServiceName, expectedEndpointsNum, time.Second, wait.ForeverTestTimeout) framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpLocalServiceName, f.Namespace.Name) err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, f.Namespace.Name, - etpClusterServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + etpClusterServiceName, expectedEndpointsNum, time.Second, wait.ForeverTestTimeout) framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpClusterServiceName, f.Namespace.Name) @@ -1171,13 +1188,20 @@ spec: ginkgo.By("Waiting for the endpoints to pop up") + var expectedEndpointsNum int + if isDualStackCluster(nodes) { + expectedEndpointsNum = len(endPoints) * 2 + } else { + expectedEndpointsNum = len(endPoints) + } + err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, f.Namespace.Name, - etpLocalServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + etpLocalServiceName, expectedEndpointsNum, time.Second, wait.ForeverTestTimeout) framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpLocalServiceName, f.Namespace.Name) err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, f.Namespace.Name, - etpClusterServiceName, len(endPoints), time.Second, wait.ForeverTestTimeout) + etpClusterServiceName, expectedEndpointsNum, time.Second, wait.ForeverTestTimeout) framework.ExpectNoError(err, "failed to validate endpoints for service %s in namespace: %s", etpClusterServiceName, f.Namespace.Name) @@ -1911,30 +1935,162 @@ spec: time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly - checkNumberOfETPRules := func(value int, pattern string) wait.ConditionFunc { - return func() (bool, error) { - numberOfETPRules := pokeNodeIPTableRules(backendNodeName, pattern) - return (numberOfETPRules == value), nil - } + noSNATServicesSet := "mgmtport-no-snat-services-v4" + if utilnet.IsIPv6String(svcLoadBalancerIP) { + noSNATServicesSet = "mgmtport-no-snat-services-v6" + } + + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 2, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 5, "OVN-KUBE-EXTERNALIP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + + ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) + externalContainer := infraapi.ExternalContainer{Name: externalClientContainerName} + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") + framework.ExpectNoError(err, "failed to curl load balancer service") + + ginkgo.By("patching service " + svcName + " to allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local") + + err = patchServiceBoolValue(f.ClientSet, svcName, "default", "/spec/allocateLoadBalancerNodePorts", false) + framework.ExpectNoError(err) + + output := e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.allocateLoadBalancerNodePorts}'") + gomega.Expect(output).To(gomega.Equal("'false'")) + + err = patchServiceStringValue(f.ClientSet, svcName, "default", "/spec/externalTrafficPolicy", "Local") + framework.ExpectNoError(err) + + output = e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.externalTrafficPolicy}'") + gomega.Expect(output).To(gomega.Equal("'Local'")) + + time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly + + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 10, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 8, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + + ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) + + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") + framework.ExpectNoError(err, "failed to curl load balancer service") + + pktSize := 60 + if utilnet.IsIPv6String(svcLoadBalancerIP) { + pktSize = 80 } - checkNumberOfNFTElements := func(value int, name string) wait.ConditionFunc { - return func() (bool, error) { - numberOfNFTElements := countNFTablesElements(backendNodeName, name) - return (numberOfNFTElements == value), nil + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + // FIXME: This used to check that the no-snat rule had been hit, but nftables + // doesn't attach counters to rules unless you explicitly request them, which + // we don't... Is this check really needed? + + ginkgo.By("Scale down endpoints of service: " + svcName + " to ensure iptable rules are also getting recreated correctly") + e2ekubectl.RunKubectlOrDie("default", "scale", "deployment", backendName, "--replicas=3") + err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, namespaceName, svcName, 3, time.Second, time.Second*120) + framework.ExpectNoError(err, fmt.Sprintf("service: %s never had an endpoint, err: %v", svcName, err)) + time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly + + // number of rules/elements should have decreased by 2 (one for the TCP port, + // one for UDP) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 8, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 6, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + + ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) + + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") + framework.ExpectNoError(err, "failed to curl load balancer service") + + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + // FIXME: This used to check that the no-snat rule had been hit, but nftables + // doesn't attach counters to rules unless you explicitly request them, which + // we don't... Is this check really needed? + }) + + ginkgo.It("Should ensure load balancer service works with 0 node ports when named targetPorts are used and ETP=local", func() { + err := WaitForServingAndReadyServiceEndpointsNum(context.TODO(), f.ClientSet, namespaceName, svcName, 4, time.Second, time.Second*180) + framework.ExpectNoError(err, fmt.Sprintf("service: %s never had an endpoint, err: %v", svcName, err)) + + svcLoadBalancerIP, err := getServiceLoadBalancerIP(f.ClientSet, namespaceName, svcName) + framework.ExpectNoError(err, fmt.Sprintf("failed to get service lb ip: %s, err: %v", svcName, err)) + + time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly + + checkExactETPRules := func(noSNATServicesSet string) { + svc, err := f.ClientSet.CoreV1().Services(namespaceName).Get(context.TODO(), svcName, metav1.GetOptions{}) + framework.ExpectNoError(err) + + // Retrieve the loadbalancer's IP + var lbIP string + if len(svc.Status.LoadBalancer.Ingress) > 0 { + lbIP = svc.Status.LoadBalancer.Ingress[0].IP + } + + discoveryClient := f.ClientSet.DiscoveryV1() + endpointSlices, err := discoveryClient.EndpointSlices(namespaceName).List(context.TODO(), metav1.ListOptions{ + LabelSelector: fmt.Sprintf("kubernetes.io/service-name=%s", svcName), + }) + framework.ExpectNoError(err) + + // Retrieve unique addresses from all endpointslices + uniqueAddresses := sets.New[string]() + for _, es := range endpointSlices.Items { + uniqueAddresses = uniqueAddresses.Union(getServingAndReadyEndpointSliceAddresses(es)) } + + mask := 32 + if utilnet.IsIPv6String(lbIP) { + mask = 128 + } + + // Build regex patterns for iptables rules using lbIP and uniqueAddresses + var patterns []string + var sets [][]string + for address := range uniqueAddresses { + tcpPattern := fmt.Sprintf(".*-A OVN-KUBE-ETP -d %s/%d -p tcp -m tcp --dport 80 -m statistic "+ + "--mode random --probability .* -j DNAT --to-destination %s$", + regexp.QuoteMeta(lbIP), mask, regexp.QuoteMeta(net.JoinHostPort(address, "80"))) + patterns = append(patterns, tcpPattern) + udpPattern := fmt.Sprintf(".*-A OVN-KUBE-ETP -d %s/%d -p udp -m udp --dport 10001 -m statistic "+ + "--mode random --probability .* -j DNAT --to-destination %s$", + regexp.QuoteMeta(lbIP), mask, regexp.QuoteMeta(net.JoinHostPort(address, "10001"))) + patterns = append(patterns, udpPattern) + + sets = append(sets, []string{address, "tcp", "80"}) + sets = append(sets, []string{address, "udp", "10001"}) + } + err = wait.PollImmediate(retryInterval, retryTimeout, checkIPTablesRulesPresent(backendNodeName, patterns)) + framework.ExpectNoError(err, "Couldn't fetch the correct iptables rules, expected to find: %v, err: %v", patterns, err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNFTElementsPresent(backendNodeName, noSNATServicesSet, sets)) + framework.ExpectNoError(err, "Couldn't fetch the correct nft elements, expected to find: %v, err: %v", sets, err) } + noSNATServicesSet := "mgmtport-no-snat-services-v4" if utilnet.IsIPv6String(svcLoadBalancerIP) { noSNATServicesSet = "mgmtport-no-snat-services-v6" } - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(2, "OVN-KUBE-ETP")) + // Initial sanity check. + ginkgo.By("checking number of firewall rules for baseline") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 2, "OVN-KUBE-ETP")) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(5, "OVN-KUBE-EXTERNALIP")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 5, "OVN-KUBE-EXTERNALIP")) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(0, noSNATServicesSet)) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, noSNATServicesSet)) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(0, "mgmtport-no-snat-nodeports")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) @@ -1942,12 +2098,38 @@ spec: _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") framework.ExpectNoError(err, "failed to curl load balancer service") + // Patch the service to use named ports. + ginkgo.By("patching service " + svcName + " to named ports") + err = patchServiceStringValue(f.ClientSet, svcName, "default", "/spec/ports/0/targetPort", "http") + framework.ExpectNoError(err) + err = patchServiceStringValue(f.ClientSet, svcName, "default", "/spec/ports/1/targetPort", "udp") + framework.ExpectNoError(err) + output := e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.ports[0].targetPort}'") + gomega.Expect(output).To(gomega.Equal("'http'")) + output = e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.ports[1].targetPort}'") + gomega.Expect(output).To(gomega.Equal("'udp'")) + time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly + ginkgo.By("checking number of firewall rules for named ports") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 2, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 5, "OVN-KUBE-EXTERNALIP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) + externalContainer = infraapi.ExternalContainer{Name: externalClientContainerName} + _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") + framework.ExpectNoError(err, "failed to curl load balancer service") + + // Patch the service to use allocateLoadBalancerNodeProts=false and externalTrafficPolicy=local. ginkgo.By("patching service " + svcName + " to allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local") err = patchServiceBoolValue(f.ClientSet, svcName, "default", "/spec/allocateLoadBalancerNodePorts", false) framework.ExpectNoError(err) - output := e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.allocateLoadBalancerNodePorts}'") + output = e2ekubectl.RunKubectlOrDie("default", "get", "svc", svcName, "-o=jsonpath='{.spec.allocateLoadBalancerNodePorts}'") gomega.Expect(output).To(gomega.Equal("'false'")) err = patchServiceStringValue(f.ClientSet, svcName, "default", "/spec/externalTrafficPolicy", "Local") @@ -1958,13 +2140,17 @@ spec: time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(10, "OVN-KUBE-ETP")) + ginkgo.By("checking number of firewall rules for allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 10, "OVN-KUBE-ETP")) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(8, noSNATServicesSet)) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 8, noSNATServicesSet)) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(0, "mgmtport-no-snat-nodeports")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + ginkgo.By("checking exact ETP firewall rules for allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local") + checkExactETPRules(noSNATServicesSet) + ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") @@ -1974,7 +2160,7 @@ spec: if utilnet.IsIPv6String(svcLoadBalancerIP) { pktSize = 80 } - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) // FIXME: This used to check that the no-snat rule had been hit, but nftables // doesn't attach counters to rules unless you explicitly request them, which @@ -1982,29 +2168,45 @@ spec: ginkgo.By("Scale down endpoints of service: " + svcName + " to ensure iptable rules are also getting recreated correctly") e2ekubectl.RunKubectlOrDie("default", "scale", "deployment", backendName, "--replicas=3") - err = framework.WaitForServiceEndpointsNum(context.TODO(), f.ClientSet, namespaceName, svcName, 3, time.Second, time.Second*120) + err = WaitForServingAndReadyServiceEndpointsNum(context.TODO(), f.ClientSet, namespaceName, svcName, 3, time.Second, time.Second*180) framework.ExpectNoError(err, fmt.Sprintf("service: %s never had an endpoint, err: %v", svcName, err)) time.Sleep(time.Second * 5) // buffer to ensure all rules are created correctly // number of rules/elements should have decreased by 2 (one for the TCP port, // one for UDP) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(8, "OVN-KUBE-ETP")) + ginkgo.By("checking number of firewall rules after scale down") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 8, "OVN-KUBE-ETP")) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(6, noSNATServicesSet)) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 6, noSNATServicesSet)) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(0, "mgmtport-no-snat-nodeports")) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + ginkgo.By("checking exact ETP firewall rules for allocateLoadBalancerNodePorts=false and externalTrafficPolicy=local after scale down") + checkExactETPRules(noSNATServicesSet) ginkgo.By("by sending a TCP packet to service " + svcName + " with type=LoadBalancer in namespace " + namespaceName + " with backend pod " + backendName) _, err = wgetInExternalContainer(externalContainer, svcLoadBalancerIP, endpointHTTPPort, "big.iso") framework.ExpectNoError(err, "failed to curl load balancer service") - err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 1, fmt.Sprintf("[1:%d] -A OVN-KUBE-ETP", pktSize))) framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) // FIXME: This used to check that the no-snat rule had been hit, but nftables // doesn't attach counters to rules unless you explicitly request them, which // we don't... Is this check really needed? + + // Also test proper deletion logic. + ginkgo.By("deleting the service") + e2ekubectl.RunKubectlOrDie("default", "delete", "service", svcName) + ginkgo.By("checking number of firewall rules after service delete") + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 2, "OVN-KUBE-ETP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfETPRules(backendNodeName, 3, "OVN-KUBE-EXTERNALIP")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of iptable rules, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, noSNATServicesSet)) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) + err = wait.PollImmediate(retryInterval, retryTimeout, checkNumberOfNFTElements(backendNodeName, 0, "mgmtport-no-snat-nodeports")) + framework.ExpectNoError(err, "Couldn't fetch the correct number of nftables elements, err: %v", err) }) ginkgo.It("Should ensure load balancer service works when ETP=local and session affinity is set", func() { @@ -2532,19 +2734,25 @@ func countServingAndReadyEndpointsSlicesNum(epList *discoveryv1.EndpointSliceLis // Only count unique addresses that are Ready and not Terminating addresses := sets.New[string]() for _, epSlice := range epList.Items { - for _, ep := range epSlice.Endpoints { - cond := ep.Conditions - ready := cond.Ready == nil || *cond.Ready - serving := cond.Serving == nil || *cond.Serving - terminating := cond.Terminating != nil && *cond.Terminating - if !ready || !serving || terminating { - continue - } - if len(ep.Addresses) == 0 || ep.Addresses[0] == "" { - continue - } - addresses.Insert(ep.Addresses[0]) - } + addresses = addresses.Union(getServingAndReadyEndpointSliceAddresses(epSlice)) } return addresses.Len() } + +func getServingAndReadyEndpointSliceAddresses(epSlice discoveryv1.EndpointSlice) sets.Set[string] { + addresses := sets.New[string]() + for _, ep := range epSlice.Endpoints { + cond := ep.Conditions + ready := cond.Ready == nil || *cond.Ready + serving := cond.Serving == nil || *cond.Serving + terminating := cond.Terminating != nil && *cond.Terminating + if !ready || !serving || terminating { + continue + } + if len(ep.Addresses) == 0 || ep.Addresses[0] == "" { + continue + } + addresses.Insert(ep.Addresses[0]) + } + return addresses +} diff --git a/test/e2e/util.go b/test/e2e/util.go index 6523c367a7..3d880d587a 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -10,6 +10,7 @@ import ( "os" "path/filepath" "regexp" + "slices" "strconv" "strings" "text/template" @@ -937,14 +938,19 @@ func patchService(c kubernetes.Interface, serviceName, serviceNamespace, jsonPat return nil } -// pokeNodeIPTableRules returns the number of iptables (both ipv6 and ipv4) rules that match the provided pattern -func pokeNodeIPTableRules(nodeName, pattern string) int { +func getNodeIPTRules(nodeName string) string { ipt4Rules, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"iptables-save", "-c"}) framework.ExpectNoError(err, "failed to get iptables rules from node %s", nodeName) ipt6Rules, err := infraprovider.Get().ExecK8NodeCommand(nodeName, []string{"ip6tables-save", "-c"}) framework.ExpectNoError(err, "failed to get ip6tables rules from node %s", nodeName) iptRules := ipt4Rules + ipt6Rules framework.Logf("DEBUG: Dumping IPTRules %v", iptRules) + return iptRules +} + +// pokeNodeIPTableRules returns the number of iptables (both ipv6 and ipv4) rules that match the provided pattern +func pokeNodeIPTableRules(nodeName, pattern string) int { + iptRules := getNodeIPTRules(nodeName) numOfMatchRules := 0 for _, iptRule := range strings.Split(iptRules, "\n") { match := strings.Contains(iptRule, pattern) @@ -956,13 +962,59 @@ func pokeNodeIPTableRules(nodeName, pattern string) int { return numOfMatchRules } -// countNFTablesElements returns the number of nftables elements in the indicated set -// of the "ovn-kubernetes" table. -func countNFTablesElements(nodeName, name string) int { +func countIPTablesRulesMatches(nodeName string, patterns []string) int { + numMatches := 0 + iptRules := getNodeIPTRules(nodeName) + for _, pattern := range patterns { + for _, iptRule := range strings.Split(iptRules, "\n") { + matched, err := regexp.MatchString(pattern, iptRule) + if err == nil && matched { + numMatches++ + } + } + } + return numMatches +} + +type Elem []string + +func (e *Elem) UnmarshalJSON(data []byte) error { + var str string + var i int + var concatenation map[string][]json.RawMessage + if err := json.Unmarshal(data, &str); err == nil { + *e = []string{str} + return nil + } + if err := json.Unmarshal(data, &i); err == nil { + *e = []string{fmt.Sprintf("%d", i)} + return nil + } + if err := json.Unmarshal(data, &concatenation); err == nil { + concat := concatenation["concat"] + for _, rawMsg := range concat { + var str string + var i int + if err := json.Unmarshal(rawMsg, &str); err == nil { + *e = append(*e, str) + } + if err := json.Unmarshal(rawMsg, &i); err == nil { + *e = append(*e, fmt.Sprintf("%d", i)) + } + } + return nil + } + return fmt.Errorf("could not unmarshal %s", string(data)) +} + +func getNFTablesElements(nodeName, name string) ([]Elem, error) { + array := []Elem{} + nftCmd := []string{"nft", "-j", "list", "set", "inet", "ovn-kubernetes", name} nftElements, err := infraprovider.Get().ExecK8NodeCommand(nodeName, nftCmd) - framework.ExpectNoError(err, "failed to get nftables elements from node %s", nodeName) - + if err != nil { + return array, err + } framework.Logf("DEBUG: Dumping NFTElements %v", nftElements) // The output will look like // @@ -985,23 +1037,84 @@ func countNFTablesElements(nodeName, name string) int { // } // // (Where the "elem" element will be omitted if the set is empty.) - // We just parse this optimistically and catch the panic if it fails. - count := -1 - defer func() { - if recover() != nil { - framework.Logf("JSON parsing error!") - } - }() - jsonResult := map[string][]map[string]map[string]any{} - json.Unmarshal([]byte(nftElements), &jsonResult) + jsonResult := map[string][]map[string]map[string]json.RawMessage{} + if err := json.Unmarshal([]byte(nftElements), &jsonResult); err != nil { + return array, err + } elem := jsonResult["nftables"][1]["set"]["elem"] if elem == nil { - return 0 + return array, err + } + err = json.Unmarshal(elem, &array) + return array, err +} + +// countNFTablesElements returns the number of nftables elements in the indicated set +// of the "ovn-kubernetes" table. +func countNFTablesElements(nodeName, name string) int { + defer ginkgo.GinkgoRecover() + array, err := getNFTablesElements(nodeName, name) + framework.ExpectNoError(err, "failed to get nftables elements from node %s", nodeName) + return len(array) +} + +func countNFTablesRulesMatches(nodeName, name string, sets [][]string) int { + numMatches := 0 + array, err := getNFTablesElements(nodeName, name) + framework.ExpectNoError(err, "failed to get nftables elements from node %s", nodeName) + for _, set := range sets { + for _, elem := range array { + if slices.Equal(set, elem) { + numMatches++ + } + } + } + return numMatches +} + +func checkNumberOfETPRules(backendNodeName string, value int, pattern string) wait.ConditionFunc { + return func() (bool, error) { + numberOfETPRules := pokeNodeIPTableRules(backendNodeName, pattern) + isExpected := numberOfETPRules == value + if !isExpected { + framework.Logf("numberOfETPRules got: %d, expected: %d", numberOfETPRules, value) + } + return isExpected, nil + } +} +func checkNumberOfNFTElements(backendNodeName string, value int, name string) wait.ConditionFunc { + return func() (bool, error) { + numberOfNFTElements := countNFTablesElements(backendNodeName, name) + isExpected := numberOfNFTElements == value + if !isExpected { + framework.Logf("numberOfNFTElements got: %d, expected: %d", numberOfNFTElements, value) + } + return isExpected, nil + } +} + +func checkIPTablesRulesPresent(backendNodeName string, patterns []string) wait.ConditionFunc { + return func() (bool, error) { + numMatches := countIPTablesRulesMatches(backendNodeName, patterns) + isExpected := numMatches == len(patterns) + if !isExpected { + framework.Logf("checkIPTablesRulesPresent got: numMatches: %d, expected: %d", + numMatches, len(patterns)) + } + return isExpected, nil + } +} +func checkNFTElementsPresent(backendNodeName, name string, sets [][]string) wait.ConditionFunc { + return func() (bool, error) { + numMatches := countNFTablesRulesMatches(backendNodeName, name, sets) + isExpected := numMatches == len(sets) + if !isExpected { + framework.Logf("checkNFTElementsPresent got: numMatches: %d, expected: %d", + numMatches, len(sets)) + } + return isExpected, nil } - elemArray := elem.([]any) - count = len(elemArray) - return count } // isDualStackCluster returns 'true' if at least one of the nodes has more than one node subnet. diff --git a/test/scripts/e2e-cp.sh b/test/scripts/e2e-cp.sh index 1ab06622f6..4d096c6ad6 100755 --- a/test/scripts/e2e-cp.sh +++ b/test/scripts/e2e-cp.sh @@ -68,7 +68,7 @@ if [ "$OVN_HA" == false ]; then # TODO streamline the db delete tests skip "recovering from deleting db files while maintaining connectivity" skip "Should validate connectivity before and after deleting all the db-pods at once in HA mode" -else +else skip "Should validate connectivity before and after deleting all the db-pods at once in Non-HA mode" skip "e2e br-int NetFlow export validation" fi @@ -150,7 +150,7 @@ else if [ "$ENABLE_NETWORK_SEGMENTATION" = true ]; then skip_label "Feature:RouteAdvertisements && EXTENDED" fi - + # Some test don't work when the default network is advertised, either because # the configuration that the test excercises does not make sense for an advertised network, or # there is some bug or functional gap @@ -185,9 +185,6 @@ else # https://issues.redhat.com/browse/OCPBUGS-55028 skip "e2e egress IP validation Cluster Default Network \[secondary-host-eip\]" - # https://issues.redhat.com/browse/OCPBUGS-50636 - skip "Services of type NodePort should listen on each host addresses" - skip "Services of type NodePort should work on secondary node interfaces for ETP=local and ETP=cluster when backend pods are also served by EgressIP" # https://github.com/ovn-kubernetes/ovn-kubernetes/issues/5240 skip "e2e control plane test node readiness according to its defaults interface MTU size should get node not ready with a too small MTU" diff --git a/test/scripts/traffic-flow-tests.sh b/test/scripts/traffic-flow-tests.sh index 9fb88acfb4..e597730f30 100755 --- a/test/scripts/traffic-flow-tests.sh +++ b/test/scripts/traffic-flow-tests.sh @@ -8,7 +8,7 @@ export OCI_BIN="${KIND_EXPERIMENTAL_PROVIDER:-docker}" export TFT_TEST_IMAGE="ghcr.io/ovn-kubernetes/kubernetes-traffic-flow-tests:latest" export TRAFFIC_FLOW_TESTS_DIRNAME="kubernetes-traffic-flow-tests" export TRAFFIC_FLOW_TESTS_REPO="https://github.com/ovn-kubernetes/kubernetes-traffic-flow-tests.git" -export TRAFFIC_FLOW_TESTS_COMMIT="3e092f2c448bdffcddd5d025b97df53145ccea62" +export TRAFFIC_FLOW_TESTS_COMMIT="ce924ee2113a80e8d7b58671da020a2c9a21fd26" export TRAFFIC_FLOW_TESTS="${TRAFFIC_FLOW_TESTS:-1,2,3}" export SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"