diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index b102116953..baf133ed17 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -1089,6 +1089,12 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignmentsWithLock(name strin // work on ovnkube-master restarts when all egress IP handlers will most likely // match and perform the setup for the same pod and status multiple times over. func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statusAssignments []egressipv1.EgressIPStatusItem, pod *kapi.Pod) error { + podKey := getPodKey(pod) + // If pod is already in succeeded or failed state, return it without proceeding further. + if util.PodCompleted(pod) { + klog.Infof("Pod %s is already in completed state, skipping egress ip assignment", podKey) + return nil + } // If statusAssignments is empty just return, not doing this will delete the // external GW set up, even though there might be no egress IP set up to // perform. @@ -1096,7 +1102,6 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu return nil } var remainingAssignments []egressipv1.EgressIPStatusItem - podKey := getPodKey(pod) // Retrieve the pod's networking configuration from the // logicalPortCache. The reason for doing this: a) only normal network // pods are placed in this cache, b) once the pod is placed here we know @@ -2374,7 +2379,7 @@ func (e *egressIPController) addExternalGWPodSNAT(podNamespace, podName string, // external GW setup in those cases. func (e *egressIPController) addExternalGWPodSNATOps(ops []ovsdb.Operation, podNamespace, podName string, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { if config.Gateway.DisableSNATMultipleGWs { - if pod, err := e.watchFactory.GetPod(podNamespace, podName); err == nil && pod.Spec.NodeName == status.Node { + if pod, err := e.watchFactory.GetPod(podNamespace, podName); err == nil && pod.Spec.NodeName == status.Node && util.PodNeedsSNAT(pod) { // if the pod still exists, add snats to->nodeIP (on the node where the pod exists) for these podIPs after deleting the snat to->egressIP // NOTE: This needs to be done only if the pod was on the same node as egressNode extIPs, err := getExternalIPsGR(e.watchFactory, pod.Spec.NodeName) diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index f078cb3dff..1bd31aad9f 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -5872,6 +5872,396 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + ginkgo.It("ensure egress ip entries are not created when pod is already moved into completed state", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + egressIP := "192.168.126.25" + node1IPv4 := "192.168.126.12/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressPod.Status.Phase = kapi.PodSucceeded + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + node1GR, + node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + node1Switch, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, node1GR, node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("ensure external gw pod snat entry is not created back when pod is moved into completed state", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + egressIP := "192.168.126.25" + node1IPv4 := "192.168.126.12/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + node1GR, + node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + node1Switch, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + podEIPSNAT := &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.StringPtr("k8s-node1"), + Options: map[string]string{ + "stateless": "false", + }, + } + podReRoutePolicy := &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + UUID: "reroute-UUID1", + } + node1GR.Nat = []string{"egressip-nat-UUID1"} + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + podEIPSNAT, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, podReRoutePolicy, &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + }, node1GR, node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + + egressPod.Status.Phase = kapi.PodSucceeded + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), &egressPod, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // Wait for pod to get moved into succeeded state. + gomega.Eventually(func() v1.PodPhase { + egressPod1, _ := fakeOvn.watcher.GetPod(egressPod.Namespace, egressPod.Name) + return egressPod1.Status.Phase + }, 5).Should(gomega.Equal(kapi.PodSucceeded)) + + node1GR.Nat = []string{} + expectedDatabaseStatewitCompletedPod := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, node1GR, node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewitCompletedPod)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should remove stale pod SNAT referring to wrong logical port after ovnkube-master is started", func() { app.Action = func(ctx *cli.Context) error { config.Gateway.DisableSNATMultipleGWs = true diff --git a/go-controller/pkg/ovn/namespace.go b/go-controller/pkg/ovn/namespace.go index f0518f7443..c78d8e854c 100644 --- a/go-controller/pkg/ovn/namespace.go +++ b/go-controller/pkg/ovn/namespace.go @@ -329,6 +329,9 @@ func (oc *DefaultNetworkController) updateNamespace(old, newer *kapi.Namespace) errors = append(errors, fmt.Errorf("failed to get all the pods (%v)", err)) } for _, pod := range existingPods { + if !util.PodNeedsSNAT(pod) { + continue + } podAnnotation, err := util.UnmarshalPodAnnotation(pod.Annotations, types.DefaultNetworkName) if err != nil { errors = append(errors, err) diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 9178406e2c..8e8b4ee186 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -312,6 +312,12 @@ func GetNodePrimaryIP(node *kapi.Node) (string, error) { kapi.NodeInternalIP, kapi.NodeExternalIP) } +// PodNeedsSNAT returns true if the given pod is eligible to setup snat entry +// in ovn for its egress traffic outside cluster, otherwise returns false. +func PodNeedsSNAT(pod *kapi.Pod) bool { + return PodScheduled(pod) && !PodWantsHostNetwork(pod) && !PodCompleted(pod) +} + // PodWantsHostNetwork returns if the given pod is hostNetworked or not to determine if networking // needs to be setup func PodWantsHostNetwork(pod *kapi.Pod) bool {