From cc4bba03d5de054429b342cb14303a2dc357d00b Mon Sep 17 00:00:00 2001 From: Martin Kennelly Date: Sun, 17 Aug 2025 18:40:39 +0100 Subject: [PATCH 1/4] OVN EIP: add UT for syncing next hops for v4/v6 Signed-off-by: Martin Kennelly (cherry picked from commit 2735d6baf4897125623be2c99151ad1842c72dc4) (cherry picked from commit 562c7495e8be16428cb4c7eabab03178ffa7fba8) (cherry picked from commit 8049fdf8a82fe7de8717b6c0dd0e1835d7959057) (cherry picked from commit 7b12cc64f993fdb522fab06d55384338f9e01e73) --- go-controller/pkg/ovn/egressip_test.go | 232 +++++++++++++++++++++++++ 1 file changed, 232 insertions(+) diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index d001e0c1df..931867e981 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -24,6 +24,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/urfave/cli/v2" + corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8stypes "k8s.io/apimachinery/pkg/types" @@ -55,6 +56,7 @@ const ( v6GatewayIP = "ae70::1" v6Node1Subnet = "ae70::66/64" v6Node2Subnet = "be70::66/64" + v6Node3Subnet = "ce70::66/64" v4ClusterSubnet = "10.128.0.0/14" v4Node1Subnet = "10.128.0.0/16" v4Node2Subnet = "10.90.0.0/16" @@ -12434,6 +12436,236 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }) }) + ginkgotable.DescribeTable("remove invalid next hop from LRP", func(isV6 bool) { + // scenario is OVNDB is config'd with pod is local and EIP egress is local and remote. + // The EgressIPs assigned Nodes is however different from the OVNDB config therefore we expect Sync to + // reconcile and update the local pod LRP (attached to cluster router) next hops to reflect the current state. + // removes invalid next hop from LRP. Controller may have missed the signal that the EIP moved to another node. + app.Action = func(*cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = true + if isV6 { + config.IPv6Mode = true + } else { + config.IPv4Mode = true + } + getSupportedByIPFamily := func(inputForV4, inputForV6 string) string { // helper to return the current supported single stack IP family (IP/CIDR/IP family name) + if isV6 { + return inputForV6 + } + return inputForV4 + } + getSupportedOVNIPFamily := func() string { + if isV6 { + return "6" + } + return "4" + } + type nodeInfo struct { // help construct a Node obj / ovn config for each test node + nodeName string + primaryINFIP string + podSubnet string + transitSWIP string + } + // node 1 is index 0, node 2 is index 1, node 3 is index 2 in the slice + nodes := []nodeInfo{{node1Name, getSupportedByIPFamily("192.168.126.12", "fc00:f853:ccd:e793::2"), + getSupportedByIPFamily(v4Node1Subnet, v6Node1Subnet), getSupportedByIPFamily("100.88.0.2", "fd97::2")}, + {node2Name, getSupportedByIPFamily("192.168.126.13", "fc00:f853:ccd:e793::3"), + getSupportedByIPFamily(v4Node2Subnet, v6Node2Subnet), getSupportedByIPFamily("100.88.0.3", "fd97::3")}, + {node3Name, getSupportedByIPFamily("192.168.126.14", "fc00:f853:ccd:e793::4"), + getSupportedByIPFamily(v4Node3Subnet, v6Node3Subnet), getSupportedByIPFamily("100.88.0.4", "fd97::4")}} + mask := "/24" // use same mask for all IP family's since it doesn't matter for this test + + generateNodeObj := func(n nodeInfo) corev1.Node { + // used to populate annotations - supports only a single IP family + addValueToIPFamilyKey := func(value string) string { + key := "ipv4" + if isV6 { + key = "ipv6" + } + return fmt.Sprintf("\"%s\":\"%s\"", key, value) + } + nodeAnnotations := map[string]string{ + "k8s.ovn.org/l3-gateway-config": fmt.Sprintf(`{"default":{"mode":"shared","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"%s", "next-hop":"192.168.126.1"}}`, n.primaryINFIP+mask), + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{%s}", addValueToIPFamilyKey(n.primaryINFIP+mask)), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\"]}", n.podSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": fmt.Sprintf("{%s}", addValueToIPFamilyKey(n.transitSWIP+mask)), + "k8s.ovn.org/zone-name": n.nodeName, + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", n.primaryINFIP+mask), + } + return getNodeObj(n.nodeName, nodeAnnotations, map[string]string{}) + } + node1, node2, node3 := generateNodeObj(nodes[0]), generateNodeObj(nodes[1]), generateNodeObj(nodes[2]) + egressNamespace := newNamespace(eipNamespace) + podIP := getSupportedByIPFamily(podV4IP, podV6IP) + egressPod := newPodWithLabels(eipNamespace, podName, node1.Name, podIP, egressPodLabel) + eipIP1 := getSupportedByIPFamily("192.168.126.200", "fc00:f853:ccd:e793::200") + eipIP2 := getSupportedByIPFamily("192.168.126.201", "fc00:f853:ccd:e793::201") + ginkgo.By("creating EgressIP that is assigned to node 2 and node 3") + eIPObj := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + eipIP1, + eipIP2, + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + // node 1 was assigned eipIP1 however, it is no longer assigned to this node. The OVN DBs will start + // with node 1 configured with IP eipIP1 and upon sync, the OVN DB must update to reflect + // the state defined here in the EgressIPStatus + { + Node: node2.Name, // remote + EgressIP: eipIP1, + }, + { + Node: node3.Name, // remote + EgressIP: eipIP2, + }, + }, + }, + } + ginkgo.By("start OVN DBs with egress nodes set to node 1 & 2 (EIP status states node 2 & 3)") + getNodeLogicalPortName := func(nodeName string) *string { n := "k8s-" + nodeName; return &n } + // node 1 is a stale next hop. EIP IP moved to node 3. + node1GWToJoinIP := getSupportedByIPFamily(nodeLogicalRouterIPv4[0], nodeLogicalRouterIPv6[0]) + staleHops := []string{node1GWToJoinIP, nodes[1].transitSWIP} // nodes[1] is node 2 + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.NBGlobal{UUID: "nbglobal-UUID", Name: node1.Name}, + // LRPs to support EIP assigned to local and a remote node + getReRoutePolicy(podIP, getSupportedOVNIPFamily(), "stale-reroute-UUID", staleHops, eipExternalID), + // stale NAT to support EIP that was previously assigned to the local node but OVN DB config persists + &nbdb.NAT{ + UUID: "stale-nat-UUID", + LogicalIP: podIP, + ExternalIP: eipIP1, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: getNodeLogicalPortName(node1.Name), + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{node1GWToJoinIP + mask}, + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"stale-reroute-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Nat: []string{"stale-nat-UUID"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "k8s-" + node1.Name + "-UUID", + Name: "k8s-" + node1.Name, + Addresses: []string{"fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr( + ovntest.MustParseIPNet(nodes[0].podSubnet)).IP.String()}, + }, + &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + Ports: []string{"k8s-" + node1.Name + "-UUID"}, + }, + }, + }, + &corev1.NamespaceList{ + Items: []corev1.Namespace{*egressNamespace}, + }, + &corev1.PodList{ + Items: []corev1.Pod{*egressPod}, + }, + &corev1.NodeList{ + Items: []corev1.Node{node1, node2, node3}, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIPObj}, + }, + ) + i, podIPv4Net, _ := net.ParseCIDR(podIP + mask) + podIPv4Net.IP = i + fakeOvn.controller.logicalPortCache.add(egressPod, "", types.DefaultNetworkName, "", + nil, []*net.IPNet{podIPv4Net}) + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("ensuring removal of invalid local external gateway next hop") + egressIPServedPodsASv4, egressIPServedPodsASv6 := buildEgressIPServedPodsAddressSets([]string{podIP}) + egressIPServedPodsAS := egressIPServedPodsASv4 + if isV6 { + egressIPServedPodsAS = egressIPServedPodsASv6 + } + validNextHops := []string{nodes[1].transitSWIP, nodes[2].transitSWIP} // node 2 and 3 + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.NBGlobal{UUID: "nbglobal-UUID", Name: node1.Name}, + // LRPs to support EIP assigned to a remote node + // valid LRP for IPv4/IPv6. IPv4 Egress Node is local, IPv6 is remote + getReRoutePolicy(podIP, getSupportedOVNIPFamily(), "valid-reroute-UUID", validNextHops, eipExternalID), + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"valid-reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{node1GWToJoinIP + mask}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "k8s-" + node1.Name + "-UUID", + Name: "k8s-" + node1.Name, + Addresses: []string{"fe:1a:b2:3f:0e:fb " + util.GetNodeManagementIfAddr( + ovntest.MustParseIPNet(nodes[0].podSubnet)).IP.String()}, + }, + &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + Ports: []string{"k8s-" + node1.Name + "-UUID"}, + }, + egressIPServedPodsAS, + } + // if IPv6 enabled and IPv4 is disabled, we still create the IPv4 served pods AS + if isV6 { + expectedDatabaseState = append(expectedDatabaseState, egressIPServedPodsASv4) + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + ginkgo.By("ensure config is consistent") + gomega.Consistently(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, ginkgotable.Entry("IPv4", false), ginkgotable.Entry("IPv6", true)) }) // TEST UTILITY FUNCTIONS; From 69afe23dcc7f0d23bf761d72d2549690118aa902 Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Tue, 30 Sep 2025 10:16:00 +0200 Subject: [PATCH 2/4] EgressIP: fix failover and restart stale SNAT/LRP; ensure status sync Scenario: - Nodes: node-1, node-2, node-3 - Egress IPs: EIP-1 - Pods: pod1 on node-1, pod2 on node-3 (pods are created via deployment replicas) - Egress-assignable nodes: node-1, node-2 - EIP-1 assigned to node-1 During a simultaneous reboot of node-1 and node-2, EIP-1 failed over to node-2 and ovnkube-controller restarted at nearly the same time: 1) EIP-1 was reassigned to node-2 by the cluster manager. 2) The sync EIP happened for EIP1 with stale status, though it cleaned SNATs/LRPs referring to node-1 due to outdated pod IPs (this is because pods will be recreated due to node reboots). 3) pod1/pod2 Add events arrived while the informer cache still had the old EIP status, so new SNATs/LRPs were created pointing to node-1. 4) The EIP-1 Add event arrived with the new status; entries for node-2 were added/updated. 5) Result: stale SNATs and LRPs with stale nexthops for node-1 remained. Fix: - Populate pod EIP status during EgressIP sync so podAssignment has accurate egressStatuses. - Reconcile stale assignments using podAssignment (egressStatuses) when the informer cache is not up to date, ensuring SNAT/LRP for the previously assigned node are corrected. - Remove stale EIP SNAT entries for remote-zone pods accordingly. - Add coverage for simultaneous EIP failover and controller restart. Signed-off-by: Periyasamy Palanisamy (cherry picked from commit 1667a51d9c7d15ba9cb9d5bb8b09205a3599bb35) (cherry picked from commit 7060af6c1111f7bf59c463c7d76a7dd318140128) (cherry picked from commit 0a3e1b90150b0890808a2afd7ea11206e3368d9b) (cherry picked from commit 75f0b7e9cad1025ed679f05ead1e165ec3cd3f11) Signed-off-by: Periyasamy Palanisamy --- go-controller/pkg/ovn/egressip.go | 102 +++++-- go-controller/pkg/ovn/egressip_test.go | 371 ++++++++++++++++++++++++- 2 files changed, 435 insertions(+), 38 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index e8cb43a6b4..2c9af5af0e 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -219,7 +219,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, pod := range pods { podLabels := labels.Set(pod.Labels) if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { - if err := oc.deletePodEgressIPAssignments(oldEIP.Name, oldEIP.Status.Items, pod); err != nil { + if err := oc.deletePodEgressIPAssignmentsWithLock(oldEIP.Name, oldEIP.Status.Items, pod); err != nil { return err } } @@ -279,7 +279,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, pod := range pods { podLabels := labels.Set(pod.Labels) if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { - if err := oc.deletePodEgressIPAssignments(oldEIP.Name, oldEIP.Status.Items, pod); err != nil { + if err := oc.deletePodEgressIPAssignmentsWithLock(oldEIP.Name, oldEIP.Status.Items, pod); err != nil { return err } } @@ -432,7 +432,7 @@ func (oc *DefaultNetworkController) reconcileEgressIPPod(old, new *v1.Pod) (err // Check if the pod stopped matching. If the pod was deleted, // "new" will be nil, so this must account for that case. if !newMatches && oldMatches { - if err := oc.deletePodEgressIPAssignments(egressIP.Name, egressIP.Status.Items, oldPod); err != nil { + if err := oc.deletePodEgressIPAssignmentsWithLock(egressIP.Name, egressIP.Status.Items, oldPod); err != nil { return err } continue @@ -453,7 +453,7 @@ func (oc *DefaultNetworkController) reconcileEgressIPPod(old, new *v1.Pod) (err // to match all pods in the namespace) and the pod has been deleted: // "new" will be nil and we need to remove the setup if new == nil { - if err := oc.deletePodEgressIPAssignments(egressIP.Name, egressIP.Status.Items, oldPod); err != nil { + if err := oc.deletePodEgressIPAssignmentsWithLock(egressIP.Name, egressIP.Status.Items, oldPod); err != nil { return err } continue @@ -531,25 +531,7 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu if len(statusAssignments) == 0 { return nil } - // We need to proceed with add only under two conditions - // 1) egressNode present in at least one status is local to this zone - // (NOTE: The relation between egressIPName and nodeName is 1:1 i.e in the same object the given node will be present only in one status) - // 2) the pod being added is local to this zone - proceed := false - for _, status := range statusAssignments { - oc.eIPC.nodeZoneState.LockKey(status.Node) - isLocalZoneEgressNode, loadedEgressNode := oc.eIPC.nodeZoneState.Load(status.Node) - if loadedEgressNode && isLocalZoneEgressNode { - proceed = true - oc.eIPC.nodeZoneState.UnlockKey(status.Node) - break - } - oc.eIPC.nodeZoneState.UnlockKey(status.Node) - } - if !proceed && !oc.isPodScheduledinLocalZone(pod) { - return nil // nothing to do if none of the status nodes are local to this master and pod is also remote - } - var remainingAssignments []egressipv1.EgressIPStatusItem + var remainingAssignments, staleAssignments []egressipv1.EgressIPStatusItem var podIPs []*net.IPNet var err error if oc.isPodScheduledinLocalZone(pod) { @@ -594,9 +576,16 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu // podState.egressIPName can be empty if no re-routes were found in // syncPodAssignmentCache for the existing pod, we will treat this case as a new add for _, status := range statusAssignments { - if exists := podState.egressStatuses.contains(status); !exists { + // Add the status if it's not already in the cache, or if it exists but is in pending state + // (meaning it was populated during EIP sync and needs to be processed for the pod). + if value, exists := podState.egressStatuses.statusMap[status]; !exists || value == egressStatusStatePending { remainingAssignments = append(remainingAssignments, status) } + // Detect stale EIP status entries (same EgressIP reassigned to a different node) + // and queue the outdated entry for cleanup. + if staleStatus := podState.egressStatuses.hasStaleEIPStatus(status); staleStatus != nil { + staleAssignments = append(staleAssignments, *staleStatus) + } } podState.egressIPName = name podState.standbyEgressIPNames.Delete(name) @@ -616,6 +605,32 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu podState.standbyEgressIPNames.Insert(name) return nil } + for _, staleStatus := range staleAssignments { + klog.V(2).Infof("Deleting stale pod egress IP status: %v for EgressIP: %s and pod: %s/%s/%v", staleStatus, name, pod.Namespace, pod.Name, podIPs) + err = oc.deletePodEgressIPAssignment(name, []egressipv1.EgressIPStatusItem{staleStatus}, pod) + if err != nil { + klog.Warningf("Failed to delete stale EgressIP status %s/%v for pod %s: %v", name, staleStatus, podKey, err) + } + delete(podState.egressStatuses.statusMap, staleStatus) + } + // We need to proceed with add only under two conditions + // 1) egressNode present in at least one status is local to this zone + // (NOTE: The relation between egressIPName and nodeName is 1:1 i.e in the same object the given node will be present only in one status) + // 2) the pod being added is local to this zone + proceed := false + for _, status := range statusAssignments { + oc.eIPC.nodeZoneState.LockKey(status.Node) + isLocalZoneEgressNode, loadedEgressNode := oc.eIPC.nodeZoneState.Load(status.Node) + if loadedEgressNode && isLocalZoneEgressNode { + proceed = true + oc.eIPC.nodeZoneState.UnlockKey(status.Node) + break + } + oc.eIPC.nodeZoneState.UnlockKey(status.Node) + } + if !proceed && !oc.isPodScheduledinLocalZone(pod) { + return nil // nothing to do if none of the status nodes are local to this master and pod is also remote + } for _, status := range remainingAssignments { klog.V(2).Infof("Adding pod egress IP status: %v for EgressIP: %s and pod: %s/%s/%v", status, name, pod.Namespace, pod.Name, podIPs) err = oc.eIPC.nodeZoneState.DoWithLock(status.Node, func(key string) error { @@ -745,16 +760,20 @@ func (oc *DefaultNetworkController) deleteNamespaceEgressIPAssignment(name strin } } for _, pod := range pods { - if err := oc.deletePodEgressIPAssignments(name, statusAssignments, pod); err != nil { + if err := oc.deletePodEgressIPAssignmentsWithLock(name, statusAssignments, pod); err != nil { return err } } return nil } -func (oc *DefaultNetworkController) deletePodEgressIPAssignments(name string, statusesToRemove []egressipv1.EgressIPStatusItem, pod *kapi.Pod) error { +func (oc *DefaultNetworkController) deletePodEgressIPAssignmentsWithLock(name string, statusesToRemove []egressipv1.EgressIPStatusItem, pod *kapi.Pod) error { oc.eIPC.podAssignmentMutex.Lock() defer oc.eIPC.podAssignmentMutex.Unlock() + return oc.deletePodEgressIPAssignment(name, statusesToRemove, pod) +} + +func (oc *DefaultNetworkController) deletePodEgressIPAssignment(name string, statusesToRemove []egressipv1.EgressIPStatusItem, pod *kapi.Pod) error { podKey := getPodKey(pod) podStatus, exists := oc.eIPC.podAssignment[podKey] if !exists { @@ -988,6 +1007,13 @@ func (oc *DefaultNetworkController) syncPodAssignmentCache(egressIPCache map[str klog.Infof("EgressIP %s is managing pod %s", egressIPName, podKey) } } + // populate podState.egressStatuses with assigned node for active egressIP IPs. + if podState.egressIPName == egressIPName { + for egressIPIP, nodeName := range state.egressIPs { + podState.egressStatuses.statusMap[egressipv1.EgressIPStatusItem{ + EgressIP: egressIPIP, Node: nodeName}] = egressStatusStatePending + } + } oc.eIPC.podAssignment[podKey] = podState } } @@ -1354,9 +1380,18 @@ func InitClusterEgressPolicies(nbClient libovsdbclient.Client, addressSetFactory return nil } +// egressStatusStatePending marks entries populated during EIP sync and +// indicates they must be reconciled again for the pod. +const egressStatusStatePending = "pending" + type statusMap map[egressipv1.EgressIPStatusItem]string type egressStatuses struct { + // statusMap tracks per EIP status assignment for a pod. + // Key: egressipv1.EgressIPStatusItem {EgressIP, Node} + // Values: + // "" -> applied/reconciled + // egressStatusStatePending -> populated during EIP sync, pending reconcile. statusMap } @@ -1368,6 +1403,21 @@ func (e egressStatuses) contains(potentialStatus egressipv1.EgressIPStatusItem) return false } +// hasStaleEIPStatus checks for stale EIP status entries already in cache. +// This addresses the race condition where an EIP is reassigned to a different node +// but the cache still contains the old assignment, leading to stale SNAT/LRP entries. +func (e egressStatuses) hasStaleEIPStatus(potentialStatus egressipv1.EgressIPStatusItem) *egressipv1.EgressIPStatusItem { + var staleStatus *egressipv1.EgressIPStatusItem + for status := range e.statusMap { + if status.EgressIP == potentialStatus.EgressIP && + status.Node != potentialStatus.Node { + staleStatus = &egressipv1.EgressIPStatusItem{EgressIP: status.EgressIP, Node: status.Node} + break + } + } + return staleStatus +} + func (e egressStatuses) delete(deleteStatus egressipv1.EgressIPStatusItem) { delete(e.statusMap, deleteStatus) } diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 931867e981..05110e57e9 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -7792,15 +7792,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { pas = getPodAssignmentState(&egressPod1) gomega.Expect(pas).NotTo(gomega.BeNil()) gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) - gomega.Expect(pas.egressStatuses.statusMap).To(gomega.Equal(statusMap{})) + gomega.Expect(pas.egressStatuses.statusMap).To(gomega.HaveLen(2)) gomega.Expect(pas.standbyEgressIPNames.Has(egressIP2Name)).To(gomega.BeTrue()) - // reset egressStatuses for rest of the test to progress correctly - fakeOvn.controller.eIPC.podAssignmentMutex.Lock() - fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses.statusMap[eip1Obj.Status.Items[0]] = "" - fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses.statusMap[eip1Obj.Status.Items[1]] = "" - fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() - // delete the standby egressIP object to make sure the cache is updated err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIP2Name, metav1.DeleteOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -7811,8 +7805,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) g.Expect(err).NotTo(gomega.HaveOccurred()) g.Expect(pas.egressStatuses.statusMap).To(gomega.HaveLen(2)) - g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) - g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) + g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[0]]).To(gomega.Equal(egressStatusStatePending)) + g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[1]]).To(gomega.Equal(egressStatusStatePending)) g.Expect(pas.standbyEgressIPNames.Has(egressIP2Name)).To(gomega.BeFalse()) }).Should(gomega.Succeed()) // add back the standby egressIP object @@ -7838,8 +7832,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) g.Expect(err).NotTo(gomega.HaveOccurred()) g.Expect(pas.egressStatuses.statusMap).To(gomega.HaveLen(2)) - g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) - g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) + g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[0]]).To(gomega.Equal(egressStatusStatePending)) + g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[1]]).To(gomega.Equal(egressStatusStatePending)) g.Expect(pas.standbyEgressIPNames.Has(egressIP2Name)).To(gomega.BeTrue()) }).Should(gomega.Succeed()) gomega.Eventually(func() string { @@ -7904,7 +7898,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) g.Expect(err).NotTo(gomega.HaveOccurred()) g.Expect(pas.egressStatuses.statusMap).To(gomega.HaveLen(1)) - g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + g.Expect(pas.egressStatuses.statusMap[eip1Obj.Status.Items[0]]).To(gomega.Equal(egressStatusStatePending)) g.Expect(pas.standbyEgressIPNames.Has(egressIP2Name)).To(gomega.BeTrue()) }).Should(gomega.Succeed()) // delete the first egressIP object and make sure the cache is updated @@ -12666,6 +12660,359 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }, ginkgotable.Entry("IPv4", false), ginkgotable.Entry("IPv6", true)) + + ginkgo.It("should update SNAT and LRP nexthops during simultaneous EIP failover and ovnkube-controller restart", func() { + app.Action = func(*cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + config.OVNKubernetesFeature.EnableInterconnect = true + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.12" + node1IPv4CIDR := node1IPv4 + "/24" + node1IPv4TranSwitchIP := "100.88.0.2/16" + node2IPv4 := "192.168.126.51" + node2IPv4CIDR := node2IPv4 + "/24" + node2IPv4TranSwitchIP := "100.88.0.3" + node2IPv4TranSwitchIPCIDR := node2IPv4TranSwitchIP + "/16" + node3IPv4 := "192.168.126.61" + node3IPv4CIDR := node3IPv4 + "/24" + node3IPv4TranSwitchIP := "100.88.0.4/16" + + egressPod := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) + egressPod2 := *newPodWithLabels(eipNamespace, podName2, node3Name, podV4IP2, egressPodLabel) + egressNamespace := newNamespace(eipNamespace) + + nodes := getIPv4Nodes([]nodeInfo{{[]string{node1IPv4CIDR}, "global", node1IPv4TranSwitchIP}, + {[]string{node2IPv4CIDR}, "remote", node2IPv4TranSwitchIPCIDR}, {[]string{node3IPv4CIDR}, "remote", node3IPv4TranSwitchIP}}) + node1 := nodes[0] + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1.Annotations["k8s.ovn.org/l3-gateway-config"] = `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}` + node1.Annotations["k8s.ovn.org/node-chassis-id"] = "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec" + node2 := nodes[1] + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node2.Annotations["k8s.ovn.org/l3-gateway-config"] = `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.51/24", "next-hop":"192.168.126.1"}}` + node2.Annotations["k8s.ovn.org/node-chassis-id"] = "89fdcfc4-6fe6-4cd3-8242-c0f85a4668ec" + node3 := nodes[2] + node3.Annotations["k8s.ovn.org/l3-gateway-config"] = `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.75/24", "next-hop":"192.168.126.1"}}` + node3.Annotations["k8s.ovn.org/node-chassis-id"] = "99fdcfc4-6fe6-4cd3-8242-c0f85a4668ec" + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node1.Name, + EgressIP: egressIP, + }, + }, + }, + } + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node2Switch := &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + } + node3Switch := &nbdb.LogicalSwitch{ + UUID: node3.Name + "-UUID", + Name: node3.Name, + } + logicalRouterOptions := map[string]string{ + "dynamic_neigh_routers": "false", + } + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node3.Name, + UUID: types.GWRouterPrefix + node3.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node3.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node3.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node3.Name, + Networks: []string{node3LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node3Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node3Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node3Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node3Name + "-UUID", + Name: types.ExternalSwitchPrefix + node3Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node3Name + "-UUID"}, + }, + node1Switch, + node2Switch, + node3Switch, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &corev1.NodeList{ + Items: []corev1.Node{node1, node2, node3}, + }, + &corev1.NamespaceList{ + Items: []corev1.Namespace{*egressNamespace}, + }, + &corev1.PodList{ + Items: []corev1.Pod{egressPod, egressPod2}, + }, + ) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + i, n, _ = net.ParseCIDR(podV4IP2 + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod2, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, false) + fakeOvn.controller.eIPC.nodeZoneState.Store(node3Name, false) + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // To simulate an ovnkube-controller restart, update the EIP object with the newly assigned node. + // Then invoke reconcileEgressIP using only the updated EIP object to trigger the EgressIP add event. + eIP.Status = egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node2.Name, + EgressIP: egressIP, + }, + }, + } + err = fakeOvn.controller.reconcileEgressIP(nil, &eIP) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressSVCServedPodsASv4, _ := buildEgressIPServiceAddressSets(nil) + egressIPServedPodsASv4, _ := buildEgressIPServedPodsAddressSets([]string{podV4IP, podV4IP2}) + egressNodeIPsASv4, _ := buildEgressIPNodeAddressSets([]string{node1IPv4, node2IPv4, node3IPv4}) + + node1Switch.QOSRules = []string{"egressip-QoS-UUID"} + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", + egressIPServedPodsASv4.Name, egressSVCServedPodsASv4.Name, egressNodeIPsASv4.Name), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, + }, + getNoReRouteReplyTrafficPolicy(), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{node2IPv4TranSwitchIP}, + UUID: "reroute-UUID1", + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "default-no-reroute-node-UUID", "egressip-no-reroute-reply-traffic", "reroute-UUID1"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Nat: []string{"pod-node-nat-UUID1"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node3.Name, + UUID: types.GWRouterPrefix + node3.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node3.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.NAT{ + UUID: "pod-node-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: node1IPv4, + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node3.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node3.Name, + Networks: []string{node3LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node3Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node3Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node3Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node3Name + "-UUID", + Name: types.ExternalSwitchPrefix + node3Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node3Name + "-UUID"}, + }, + node1Switch, + node2Switch, + node3Switch, + getDefaultQoSRule(false), + egressSVCServedPodsASv4, egressIPServedPodsASv4, egressNodeIPsASv4, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) }) // TEST UTILITY FUNCTIONS; From 0dd5bb41c80f953647fd8b921814c09d399b521a Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Thu, 30 Oct 2025 12:43:53 +0100 Subject: [PATCH 3/4] Handle missing pod delete event in EIP controller During an ovnkube-controller restart, pod add/remove events for EgressIP-served pods may occur before the factory.egressIPPod handler is registered in the watch factory. As a result, the EIP controller never able to handle pod delete, leading to stale logical router policy (LRP) entry. Scenario: ovnkube-controller starts. The EIP controller processes the namespace add event (oc.WatchEgressIPNamespaces) and creates an LRP entry for the served pod. The pod is deleted. The factory.egressIPPod handler registration happens afterward via oc.WatchEgressIPPods. The pod delete event is never processed by the EIP controller. Fix: 1. Start oc.WatchEgressIPPods followed by oc.WatchEgressIPNamespaces. 2. Sync EgressIPs before registering factory.egressIPPod event handler. 3. Removal of Sync EgressIPs for factory.EgressIPNamespaceType which is no longer needed. Signed-off-by: Periyasamy Palanisamy (cherry picked from commit 8975b00e7a43eede52195eff15e84b841343f28a) (cherry picked from commit b8303a2ac923298688f432c92ac33e679cf07d66) (cherry picked from commit 3f391e7b11bf8198a8ec8cc0fa305ef89a42a091) (cherry picked from commit 6cd0f090f675df8780ae673950d0e640bdd1b850) Signed-off-by: Periyasamy Palanisamy --- .../pkg/ovn/default_network_controller.go | 14 +- go-controller/pkg/ovn/egressip_test.go | 150 +++++++++--------- 2 files changed, 84 insertions(+), 80 deletions(-) diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index a3d48d1a4c..46094a9308 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -481,7 +481,7 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { if config.OVNKubernetesFeature.EnableEgressIP { // This is probably the best starting order for all egress IP handlers. - // WatchEgressIPNamespaces and WatchEgressIPPods only use the informer + // WatchEgressIPPods and WatchEgressIPNamespaces only use the informer // cache to retrieve the egress IPs when determining if namespace/pods // match. It is thus better if we initialize them first and allow // WatchEgressNodes / WatchEgressIP to initialize after. Those handlers @@ -490,10 +490,14 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { // risk performing a bunch of modifications on the EgressIP objects when // we restart and then have these handlers act on stale data when they // sync. - if err := WithSyncDurationMetric("egress ip namespace", oc.WatchEgressIPNamespaces); err != nil { + // Initialize WatchEgressIPPods before WatchEgressIPNamespaces to ensure + // that no pod events are missed by the EgressIPController. It's acceptable + // to miss a namespace event, as it will be handled indirectly through + // the pod delete event within that namespace. + if err := WithSyncDurationMetric("egress ip pod", oc.WatchEgressIPPods); err != nil { return err } - if err := WithSyncDurationMetric("egress ip pod", oc.WatchEgressIPPods); err != nil { + if err := WithSyncDurationMetric("egress ip namespace", oc.WatchEgressIPNamespaces); err != nil { return err } if err := WithSyncDurationMetric("egress node", oc.WatchEgressNodes); err != nil { @@ -1126,13 +1130,13 @@ func (h *defaultNetworkControllerEventHandler) SyncFunc(objs []interface{}) erro case factory.EgressFirewallType: syncFunc = h.oc.syncEgressFirewall - case factory.EgressIPNamespaceType: + case factory.EgressIPPodType: syncFunc = h.oc.syncEgressIPs case factory.EgressNodeType: syncFunc = h.oc.initClusterEgressPolicies - case factory.EgressIPPodType, + case factory.EgressIPNamespaceType, factory.EgressIPType: syncFunc = nil diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 05110e57e9..dfd6ff2f5e 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -343,9 +343,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Namespace{*egressNamespace}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -558,9 +558,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Namespace{*egressNamespace}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -802,9 +802,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Namespace{*egressNamespace}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1143,9 +1143,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Namespace{*egressNamespace}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1567,9 +1567,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Namespace{*egressNamespace}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -1958,9 +1958,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -2353,9 +2353,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { n.IP = i fakeOvn.controller.logicalPortCache.add(&p.Pod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -2753,9 +2753,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Namespace{*egressNamespace}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -3195,9 +3195,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -3521,9 +3521,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -3804,9 +3804,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -4021,9 +4021,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -4390,9 +4390,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -4525,9 +4525,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -4662,9 +4662,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -4901,9 +4901,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -5162,9 +5162,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -5345,9 +5345,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -5544,9 +5544,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.controller.zone = "local" } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -6014,9 +6014,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -6736,10 +6736,10 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.controller.lsManager.AddOrUpdateSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4Node1Subnet)}) err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() @@ -7088,8 +7088,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.controller.lsManager.AddOrUpdateSwitch(node1.Name, []*net.IPNet{ovntest.MustParseIPNet(v4Node1Subnet)}) fakeOvn.controller.WatchPods() - fakeOvn.controller.WatchEgressIPNamespaces() fakeOvn.controller.WatchEgressIPPods() + fakeOvn.controller.WatchEgressIPNamespaces() fakeOvn.controller.WatchEgressNodes() fakeOvn.controller.WatchEgressIP() @@ -7514,10 +7514,10 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.controller.lsManager.AddOrUpdateSwitch(node2.Name, []*net.IPNet{ovntest.MustParseIPNet(v4Node2Subnet)}) err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() @@ -8068,9 +8068,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Node{node}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -8332,9 +8332,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Node{node1, node2}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -8528,9 +8528,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Node{node1, node2}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -8844,9 +8844,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -9103,9 +9103,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, ) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -9300,10 +9300,10 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() @@ -9485,9 +9485,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Node{node1, node2}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -9851,9 +9851,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Node{node1, node2}, }) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -10251,10 +10251,10 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() @@ -10446,10 +10446,10 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() @@ -10768,10 +10768,10 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() @@ -11029,9 +11029,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod2, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -11918,9 +11918,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod1, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -12600,9 +12600,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.controller.logicalPortCache.add(egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{podIPv4Net}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -12849,9 +12849,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, false) fakeOvn.controller.eIPC.nodeZoneState.Store(node3Name, false) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) From 7d8eb178de099c7c4d841312c93290237f851645 Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Tue, 4 Nov 2025 18:10:54 +0100 Subject: [PATCH 4/4] Fix cleanup of LRP for deleted pods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the EIP controller cleans up a stale EIP assignment for a pod, it also removes the pod object from the podAssignment cache. This is incorrect, as it prevents the EIP controller from processing the subsequent pod delete event. Scenario: 1. pod-1 is served by eip-1, both hosted on node1. 2. node1’s ovnkube-controller restarts. 3. Pod add event is received by the EIP controller — no changes. 4. eip-1 moves from node1 to node0. 5. The EIP controller receives the eip-1 add event. 6. eip-1 cleans up pod-1’s stale assignment (SNAT and LRP) for node1, but removes the pod object from the podAssignment cache when no other assignments found. 7. The EIP controller programs the LRP entry with node0’s transit IP as the next hop, but the pod assignment cache is not updated with new podAssignmentState. 8. The pod delete event is received by the EIP controller but ignored, since the pod object is missing from the assignment cache. So this commit fixes the issue by adding podAssignmentState back into podAssignment cache at step 7. Signed-off-by: Periyasamy Palanisamy (cherry picked from commit 16dedd155da2fc4e250f3dfa32fdd3f41cda2f1c) (cherry picked from commit f4e2c179966103f11b672a074aade26cc26bd357) (cherry picked from commit be0f3b84e11072d8a6907838df591d0fac29b7a4) (cherry picked from commit 3a524b5562ad30d18cf91bba743765adc5710149) --- go-controller/pkg/ovn/egressip.go | 5 +- go-controller/pkg/ovn/egressip_test.go | 134 +++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 2c9af5af0e..d1d839c4a0 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -570,7 +570,6 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu egressStatuses: egressStatuses{make(map[egressipv1.EgressIPStatusItem]string)}, standbyEgressIPNames: sets.New[string](), } - oc.eIPC.podAssignment[podKey] = podState } else if podState.egressIPName == name || podState.egressIPName == "" { // We do the setup only if this egressIP object is the one serving this pod OR // podState.egressIPName can be empty if no re-routes were found in @@ -613,6 +612,10 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu } delete(podState.egressStatuses.statusMap, staleStatus) } + // We store podState into podAssignment cache at this place for two reasons. + // 1. When podAssignmentState is newly created. + // 2. deletePodEgressIPAssignments might clean the podAssignment cache, make sure we add it back. + oc.eIPC.podAssignment[podKey] = podState // We need to proceed with add only under two conditions // 1) egressNode present in at least one status is local to this zone // (NOTE: The relation between egressIPName and nodeName is 1:1 i.e in the same object the given node will be present only in one status) diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index dfd6ff2f5e..9febcd2832 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -13008,6 +13008,140 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { egressSVCServedPodsASv4, egressIPServedPodsASv4, egressNodeIPsASv4, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + // Since we called reconcileEgressIP manually with new eIP status in the above step, so update EIP object with + // same status as well. + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), &eIP, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + // Now delete local zone egressPod and check LRP is removed. + err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Delete(context.TODO(), egressPod.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressIPServedPodsASv4, _ = buildEgressIPServedPodsAddressSets([]string{podV4IP2}) + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", + egressIPServedPodsASv4.Name, egressSVCServedPodsASv4.Name, egressNodeIPsASv4.Name), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, + }, + getNoReRouteReplyTrafficPolicy(), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "default-no-reroute-node-UUID", "egressip-no-reroute-reply-traffic"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Nat: []string{"pod-node-nat-UUID1"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node3.Name, + UUID: types.GWRouterPrefix + node3.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node3.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + // we still expect the SNAT at the end because we are not watching pod events that would remove the SNAT on pod deletion + &nbdb.NAT{ + UUID: "pod-node-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: node1IPv4, + ExternalIDs: nil, + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node3.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node3.Name, + Networks: []string{node3LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node3Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node3Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node3Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node3Name + "-UUID", + Name: types.ExternalSwitchPrefix + node3Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node3Name + "-UUID"}, + }, + node1Switch, + node2Switch, + node3Switch, + getDefaultQoSRule(false), + egressSVCServedPodsASv4, egressIPServedPodsASv4, egressNodeIPsASv4, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } err := app.Run([]string{app.Name})