From a1871a0859a5b84578c33ad092b249af87f0b87d Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Thu, 25 Sep 2025 09:50:26 +0200 Subject: [PATCH] Skip Pending pods in EgressIP status updates When processing pods during an EgressIP status update, the controller used to stop iterating as soon as it encountered a pod in Pending state (in my case, pod IPs are not found when pod is in pending state with container creating status). This caused any subsequent Running pods to be skipped, leaving their SNAT entries unprogrammed on the egress node. With this change, only Pending pods are skipped, while iteration continues for the rest. This ensures that Running pods are properly processed and their SNAT entries are programmed. This change also skips pods that are unscheduled or use host networking. Signed-off-by: Periyasamy Palanisamy (cherry picked from commit 2afbaf6a9252a68ce9446fd2ae293eddbfb80e07) --- go-controller/pkg/ovn/egressip.go | 45 ++- go-controller/pkg/ovn/egressip_test.go | 501 +++++++++++++++++++++++++ 2 files changed, 523 insertions(+), 23 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 9bc430d1fd..00c238abba 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -288,6 +288,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e } } // CASE 3: EIP object update + var errs []error if old != nil && new != nil { oldEIP := old newEIP := new @@ -374,7 +375,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } if err := e.addNamespaceEgressIPAssignments(ni, newEIP.Name, newEIP.Status.Items, mark, namespace, newEIP.Spec.PodSelector); err != nil { - return fmt.Errorf("network %s: failed to add namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add namespace %s egress IP config: %v", ni.GetNetworkName(), namespace.Name, err)) } } } @@ -403,16 +404,13 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e return fmt.Errorf("network %s: failed to delete pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) } } - if util.PodCompleted(pod) { - continue - } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { - return fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } } } @@ -452,7 +450,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e podLabels := labels.Set(pod.Labels) if newPodSelector.Matches(podLabels) { if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { - return fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } } } @@ -471,12 +469,9 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e return fmt.Errorf("network %s: failed to delete pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) } } - if util.PodCompleted(pod) { - continue - } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { if err := e.addPodEgressIPAssignmentsWithLock(ni, newEIP.Name, newEIP.Status.Items, mark, pod); err != nil { - return fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err) + errs = append(errs, fmt.Errorf("network %s: failed to add pod %s/%s egress IP config: %v", ni.GetNetworkName(), pod.Namespace, pod.Name, err)) } } } @@ -484,7 +479,7 @@ func (e *EgressIPController) reconcileEgressIP(old, new *egressipv1.EgressIP) (e } } } - return nil + return utilerrors.Join(errs...) } // reconcileEgressIPNamespace reconciles the database configuration setup in nbdb @@ -522,6 +517,7 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa if err != nil { return err } + var errs []error for _, egressIP := range egressIPs { if err := e.egressIPCache.DoWithLock(egressIP.Name, func(key string) error { // get latest egressIP object after we get cache lock to serialize egress ip operations @@ -563,10 +559,10 @@ func (e *EgressIPController) reconcileEgressIPNamespace(old, new *corev1.Namespa return nil }); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } // reconcileEgressIPPod reconciles the database configuration setup in nbdb @@ -598,6 +594,7 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err if err != nil { return err } + var errs []error for _, egressIP := range egressIPs { if err := e.egressIPCache.DoWithLock(egressIP.Name, func(key string) error { // get latest egressIP object after we get cache lock to serialize egress ip operations @@ -712,10 +709,10 @@ func (e *EgressIPController) reconcileEgressIPPod(old, new *corev1.Pod) (err err return nil }); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } // main reconcile functions end here and local zone controller functions begin @@ -725,16 +722,17 @@ func (e *EgressIPController) addEgressIPAssignments(name string, statusAssignmen if err != nil { return err } + var errs []error for _, namespace := range namespaces { ni, err := e.networkManager.GetActiveNetworkForNamespace(namespace.Name) if err != nil { return fmt.Errorf("failed to get active network for namespace %s: %v", namespace.Name, err) } if err := e.addNamespaceEgressIPAssignments(ni, name, statusAssignments, mark, namespace, podSelector); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } func (e *EgressIPController) addNamespaceEgressIPAssignments(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, @@ -756,12 +754,13 @@ func (e *EgressIPController) addNamespaceEgressIPAssignments(ni util.NetInfo, na return err } } + var errs []error for _, pod := range pods { if err := e.addPodEgressIPAssignmentsWithLock(ni, name, statusAssignments, mark, pod); err != nil { - return err + errs = append(errs, err) } } - return nil + return utilerrors.Join(errs...) } func (e *EgressIPController) addPodEgressIPAssignmentsWithLock(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, pod *corev1.Pod) error { @@ -778,9 +777,9 @@ func (e *EgressIPController) addPodEgressIPAssignmentsWithLock(ni util.NetInfo, // requires holding the podAssignmentMutex lock func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, pod *corev1.Pod) error { podKey := getPodKey(pod) - // If pod is already in succeeded or failed state, return it without proceeding further. - if util.PodCompleted(pod) { - klog.Infof("Pod %s is already in completed state, skipping egress ip assignment", podKey) + // Ignore completed pods, host networked pods, pods not scheduled + if !util.PodNeedsSNAT(pod) { + klog.Infof("Pod %s is not in desired state, skipping egress ip assignment", podKey) return nil } // If statusAssignments is empty just return, not doing this will delete the @@ -2011,7 +2010,7 @@ func (e *EgressIPController) generateCacheForEgressIP() (egressIPCache, error) { nadName = nadNames[0] // there should only be one active network } for _, pod := range pods { - if util.PodCompleted(pod) || !util.PodScheduled(pod) || util.PodWantsHostNetwork(pod) { + if !util.PodNeedsSNAT(pod) { continue } if egressLocalNodesCache.Len() == 0 && !e.isPodScheduledinLocalZone(pod) { diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 7455d9ca40..131c63d354 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -11154,6 +11154,507 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" ), ) + ginkgo.DescribeTable( + "should ensure EgressIP skips host-network and pending pods from namespace", + func(includeUnscheduled, includeScheduledButNoIP, includeHostNetwork, interconnect, isPodRemote bool) { + app.Action = func(*cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + config.OVNKubernetesFeature.EnableInterconnect = interconnect + + egressIP1 := "192.168.126.101" + node1IPv4 := "192.168.126.12" + node1IPv4CIDR := node1IPv4 + "/24" + node2IPv4 := "192.168.126.51" + node2IPv4CIDR := node2IPv4 + "/24" + + egressPod3 := *newPodWithLabels(eipNamespace, "egress-pod3", node1Name, podV4IP, egressPodLabel) + egressPod4 := *newPodWithLabels(eipNamespace, "egress-pod4", node1Name, podV4IP2, egressPodLabel) + egressNamespace := newNamespace(eipNamespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4CIDR), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node1Subnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node1IPv4CIDR), + "k8s.ovn.org/zone-name": "global", + } + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4CIDR), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4Node2Subnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.51/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "89fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", node2IPv4CIDR), + "k8s.ovn.org/zone-name": "global", + } + if isPodRemote { + annotations["k8s.ovn.org/zone-name"] = "remote" + annotations["k8s.ovn.org/remote-zone-migrated"] = "remote" + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node2Switch := &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + } + + pods := []corev1.Pod{egressPod3, egressPod4} + if includeUnscheduled { + unScheduledPod := corev1.Pod{ + ObjectMeta: newPodMeta(eipNamespace, "egress-pod", egressPodLabel), + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "containerName", + Image: "containerImage", + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + }, + } + pods = append(pods, unScheduledPod) + } + var pendingScheduledPodWithNoIP *corev1.Pod + if includeScheduledButNoIP { + pendingScheduledPodWithNoIP = newPodWithLabels(eipNamespace, "egress-pod1", node2Name, "", egressPodLabel) + pendingScheduledPodWithNoIP.Status.Phase = corev1.PodPending + pods = append(pods, *pendingScheduledPodWithNoIP) + } + if includeHostNetwork { + hnPod := *newPodWithLabels(eipNamespace, "egress-pod2", node2Name, node2IPv4, egressPodLabel) + hnPod.Spec.HostNetwork = true + pods = append(pods, hnPod) + } + + dynamicNeighRouters := "true" + if config.OVNKubernetesFeature.EnableInterconnect { + dynamicNeighRouters = "false" + } + logicalRouterOptions := map[string]string{ + "dynamic_neigh_routers": dynamicNeighRouters, + } + expectedNatLogicalPort1 := "k8s-node1" + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + // stale LRP reference to be removed by EIP controller sync function. + Policies: []string{"hostnet-pod-reroute-UUID1"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + // stale NAT reference to be removed by EIP controller sync function. + Nat: []string{"hostnet-pod-egressip-nat-UUID1"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + // stale LRP and SNAT entries (for a host networked pod) to be removed by EIP controller sync function. + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", node2IPv4), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: getEgressIPLRPReRouteDbIDs(egressIPName, eipNamespace, "egress-pod2", IPFamilyValueV4, types.DefaultNetworkName, DefaultNetworkControllerName).GetExternalIDs(), + UUID: "hostnet-pod-reroute-UUID1", + }, + &nbdb.NAT{ + UUID: "hostnet-pod-egressip-nat-UUID1", + LogicalIP: node2IPv4, + ExternalIP: egressIP1, + ExternalIDs: getEgressIPNATDbIDs(egressIPName, eipNamespace, "egress-pod2", IPFamilyValueV4, DefaultNetworkControllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + node1Switch, + node2Switch, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &corev1.NodeList{ + Items: []corev1.Node{node1, node2}, + }, + &corev1.NamespaceList{ + Items: []corev1.Namespace{*egressNamespace}, + }, + &corev1.PodList{ + Items: pods, + }, + ) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod3, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + if pendingScheduledPodWithNoIP != nil { + fakeOvn.controller.logicalPortCache.add(pendingScheduledPodWithNoIP, "", types.DefaultNetworkName, "", nil, []*net.IPNet{}) + } + i, n, _ = net.ParseCIDR(podV4IP2 + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod4, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if isPodRemote { + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, false) + } + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressSVCServedPodsASv4, _ := buildEgressServiceAddressSets(nil) + egressIPServedPodsASv4, _ := buildEgressIPServedPodsAddressSets([]string{podV4IP, podV4IP2}, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName) + egressNodeIPsASv4, _ := buildEgressIPNodeAddressSets([]string{node1IPv4, node2IPv4}) + + node1Switch.QOSRules = []string{"default-QoS-UUID"} + if !isPodRemote { + node2Switch.QOSRules = []string{"default-QoS-UUID"} + } + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", + egressIPServedPodsASv4.Name, egressSVCServedPodsASv4.Name, egressNodeIPsASv4.Name), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, + ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + getNoReRouteReplyTrafficPolicy(types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToPodDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToJoinDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "default-no-reroute-node-UUID", "default-no-reroute-reply-traffic"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + node1Switch, + node2Switch, + getDefaultQoSRule(false, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + egressSVCServedPodsASv4, egressIPServedPodsASv4, egressNodeIPsASv4, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + for i := 0; i < 10; i++ { + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node1Name, egressIPName, egressIP1) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + eips, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s", + egressIPServedPodsASv4.Name, egressSVCServedPodsASv4.Name, egressNodeIPsASv4.Name), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-node-UUID", + Options: map[string]string{"pkt_mark": types.EgressIPNodeConnectionMark}, + ExternalIDs: getEgressIPLRPNoReRoutePodToNodeDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + getNoReRouteReplyTrafficPolicy(types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToPodDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + ExternalIDs: getEgressIPLRPNoReRoutePodToJoinDbIDs(IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod3.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod3.Namespace, egressPod3.Name, IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + UUID: "reroute-UUID1", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod4.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: getEgressIPLRPReRouteDbIDs(eIP.Name, egressPod4.Namespace, egressPod4.Name, IPFamilyValueV4, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName).GetExternalIDs(), + UUID: "reroute-UUID2", + }, + &nbdb.LogicalRouter{ + Name: types.OVNClusterRouter, + UUID: types.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "default-no-reroute-node-UUID", "reroute-UUID1", "reroute-UUID2", "default-no-reroute-reply-traffic"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node1.Name, + UUID: types.GWRouterPrefix + node1.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID"}, + Nat: []string{"pod1-egressip-nat-UUID1", "pod4-egressip-nat-UUID1"}, + Options: logicalRouterOptions, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + node2.Name, + UUID: types.GWRouterPrefix + node2.Name + "-UUID", + Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID"}, + Options: logicalRouterOptions, + }, + &nbdb.NAT{ + UUID: "pod1-egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: eips[0], + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod3.Namespace, egressPod3.Name, IPFamilyValueV4, fakeOvn.controller.controllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.NAT{ + UUID: "pod4-egressip-nat-UUID1", + LogicalIP: podV4IP2, + ExternalIP: eips[0], + ExternalIDs: getEgressIPNATDbIDs(egressIPName, egressPod4.Namespace, egressPod4.Name, IPFamilyValueV4, fakeOvn.controller.controllerName).GetExternalIDs(), + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + libovsdbops.RouterPort: types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name + "-UUID", + Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node1Name + "-UUID", + Name: types.ExternalSwitchPrefix + node1Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: types.ExternalSwitchPrefix + node2Name + "-UUID", + Name: types.ExternalSwitchPrefix + node2Name, + Ports: []string{types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "-UUID"}, + }, + node1Switch, + node2Switch, + getDefaultQoSRule(false, types.DefaultNetworkName, fakeOvn.controller.eIPC.controllerName), + egressSVCServedPodsASv4, egressIPServedPodsASv4, egressNodeIPsASv4, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + // Try out EgressIP failover scenarios for the next iteration. + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, egressIPName, egressIP1) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + _, nodes = getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.Name)) + // remove label from node1 and node2. + node1.Labels = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node2.Labels = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + status := []egressipv1.EgressIPStatusItem{} + err = fakeOvn.controller.eIPC.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgo.Entry("unscheduled pending pod only, interconnect disabled", true, false, false, false, false), + ginkgo.Entry("scheduled pending pod only, interconnect disabled", false, true, false, false, false), + ginkgo.Entry("host-networked pod only, interconnect disabled", false, false, true, false, false), + ginkgo.Entry("unscheduled pending pod only, interconnect enabled, local pod", true, false, false, true, false), + ginkgo.Entry("scheduled pending pod only, interconnect enabled, local pod", false, true, false, true, false), + ginkgo.Entry("host-networked pod only, interconnect enabled, local pod", false, false, true, true, false), + ginkgo.Entry("unscheduled pending pod only, interconnect enabled, remote pod", true, false, false, true, true), + ginkgo.Entry("scheduled pending pod only, interconnect enabled, remote pod", false, true, false, true, true), + ginkgo.Entry("host-networked pod only, interconnect enabled, remote pod", false, false, true, true, true), + ) + ginkgo.It("should not deadlock when two EgressIP pods are hosted and egress through opposite nodes", func() { // This test is designed to reproduce a deadlock issue. // To reproduce undo the corrspoinding fix introduced in this commit and