From 5e272576bf0bc1dc0a0ec0493b923ebefff30c21 Mon Sep 17 00:00:00 2001 From: Alexander Constantinescu Date: Wed, 16 Jun 2021 13:52:22 +0200 Subject: [PATCH 01/10] Change assignment set sequence for egress node ADD THe assignment set sequence is slightly incorrect for egress nodes as it couldn't correctly set all fields in the case a node is reachable but not ready. Signed-off-by: Alexander Constantinescu (cherry picked from commit 11b6a83a6c1db9c6a0c7c0db9929ccc964d6cf76) --- go-controller/pkg/ovn/ovn.go | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index 93176a8519..361eacaaf5 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -664,16 +664,21 @@ func (oc *Controller) WatchEgressNodes() { klog.Error(err) } nodeLabels := node.GetLabels() - if _, hasEgressLabel := nodeLabels[nodeEgressLabel]; hasEgressLabel { + _, hasEgressLabel := nodeLabels[nodeEgressLabel] + if hasEgressLabel { oc.setNodeEgressAssignable(node.Name, true) - if oc.isEgressNodeReady(node) { - oc.setNodeEgressReady(node.Name, true) - if oc.isEgressNodeReachable(node) { - oc.setNodeEgressReachable(node.Name, true) - if err := oc.addEgressNode(node); err != nil { - klog.Error(err) - } - } + } + isReady := oc.isEgressNodeReady(node) + if isReady { + oc.setNodeEgressReady(node.Name, true) + } + isReachable := oc.isEgressNodeReachable(node) + if isReachable { + oc.setNodeEgressReachable(node.Name, true) + } + if hasEgressLabel && isReachable && isReady { + if err := oc.addEgressNode(node); err != nil { + klog.Error(err) } } }, From 94090d46df0d8784bedb0cadee3bd65c9780d21c Mon Sep 17 00:00:00 2001 From: Alexander Constantinescu Date: Wed, 16 Jun 2021 13:55:23 +0200 Subject: [PATCH 02/10] Avoid erroring if master cannot find all egress labels Returning an error in this case is slightly incorrect since this case shouldn't really be considered an error. ovnkube-node will set the annotation once it has parsed it, but it is not expected to always be readily available for ovnkube-master to read. This also happens to spam the logs Signed-off-by: Alexander Constantinescu (cherry picked from commit 1468b31062cbf6709aa64fe282be157b382e402d) --- go-controller/pkg/ovn/egressip.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index d19fc06b42..ed2f29d06e 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -703,7 +703,8 @@ func (oc *Controller) initEgressIPAllocator(node *kapi.Node) (err error) { var v4Subnet, v6Subnet *net.IPNet v4IfAddr, v6IfAddr, err := util.ParseNodePrimaryIfAddr(node) if err != nil { - return fmt.Errorf("unable to use node for egress assignment, err: %v", err) + klog.V(5).Infof("Unable to use node for egress assignment, err: %v", err) + return nil } if v4IfAddr != "" { v4IP, v4Subnet, err = net.ParseCIDR(v4IfAddr) From c154e226cbd3cab3e82bd746005b272fa7d11707 Mon Sep 17 00:00:00 2001 From: Alexander Constantinescu Date: Tue, 8 Dec 2020 13:29:32 +0100 Subject: [PATCH 03/10] Egress IP with ECMP routing Egress IP has until now been flawed. For EgressIP objects with multiple statuses (essentially inidcating HA mode request) we created several individual logical router policies for every node used for egress IP, however only one was ever used at any given moment. OVN has now implemented ECMP routing for logical router policies allowing us to specify multiple nexthops and OVN to load balance between them. Now all egress nodes assigned to an EgressIP should be used equally during traffic distribution. Signed-off-by: Alexander Constantinescu (cherry picked from commit 19e2f18b55c6b874a68441512cbe51cd72cdb447) --- go-controller/pkg/ovn/egressip.go | 146 +++++++++++++------------ go-controller/pkg/ovn/egressip_test.go | 50 ++++----- 2 files changed, 104 insertions(+), 92 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index ed2f29d06e..97a61e1c33 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -820,10 +820,10 @@ func (e *egressIPController) addPodEgressIP(eIP *egressipv1.EgressIP, pod *kapi. if e.needsRetry(pod) { e.podRetry.Delete(getPodKey(pod)) } + if err := e.handleEgressReroutePolicy(podIPs, eIP.Status.Items, eIP.Name, e.createEgressReroutePolicy); err != nil { + return fmt.Errorf("unable to create logical router policy, err: %v", err) + } for _, status := range eIP.Status.Items { - if err := e.createEgressReroutePolicy(podIPs, status, eIP.Name); err != nil { - return fmt.Errorf("unable to create logical router policy for status: %v, err: %v", status, err) - } if err := createNATRule(podIPs, status, eIP.Name); err != nil { return fmt.Errorf("unable to create NAT rule for status: %v, err: %v", status, err) } @@ -842,10 +842,10 @@ func (e *egressIPController) deletePodEgressIP(eIP *egressipv1.EgressIP, pod *ka if len(podIPs) == 0 { return fmt.Errorf("unable to retrieve pod IPs, err: no pod IPs defined") } + if err := e.handleEgressReroutePolicy(podIPs, eIP.Status.Items, eIP.Name, e.deleteEgressReroutePolicy); err != nil { + return fmt.Errorf("unable to delete logical router policy, err: %v", err) + } for _, status := range eIP.Status.Items { - if err := e.deleteEgressReroutePolicy(podIPs, status, eIP.Name); err != nil { - return fmt.Errorf("unable to delete logical router policy for status: %v, err: %v", status, err) - } if err := deleteNATRule(podIPs, status, eIP.Name); err != nil { return fmt.Errorf("unable to delete NAT rule for status: %v, err: %v", status, err) } @@ -902,83 +902,95 @@ func (e *egressIPController) needsRetry(pod *kapi.Pod) bool { // createEgressReroutePolicy uses logical router policies to force egress traffic to the egress node, for that we need // to retrive the internal gateway router IP attached to the egress node. This method handles both the shared and // local gateway mode case -func (e *egressIPController) createEgressReroutePolicy(podIps []net.IP, status egressipv1.EgressIPStatusItem, egressIPName string) error { - isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) - if err != nil { - return fmt.Errorf("unable to retrieve gateway IP for node: %s, err: %v", status.Node, err) - } - for _, podIP := range podIps { - var err error - var stderr, filterOption string - if isEgressIPv6 && utilnet.IsIPv6(podIP) { - filterOption = fmt.Sprintf("ip6.src == %s", podIP.String()) - } else if !isEgressIPv6 && !utilnet.IsIPv6(podIP) { - filterOption = fmt.Sprintf("ip4.src == %s", podIP.String()) - } - policyIDs, err := findReroutePolicyIDs(filterOption, egressIPName, gatewayRouterIP) +func (e *egressIPController) handleEgressReroutePolicy(podIps []net.IP, statuses []egressipv1.EgressIPStatusItem, egressIPName string, cb func(filterOption, egressIPName string, gatewayRouterIPs []net.IP) error) error { + gatewayRouterIPv4s, gatewayRouterIPv6s := []net.IP{}, []net.IP{} + for _, status := range statuses { + isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) if err != nil { - return err + klog.Errorf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + continue } - if policyIDs == nil { - _, stderr, err = util.RunOVNNbctl( - "--id=@lr-policy", - "create", - "logical_router_policy", - "action=reroute", - fmt.Sprintf("match=\"%s\"", filterOption), - fmt.Sprintf("priority=%v", types.EgressIPReroutePriority), - fmt.Sprintf("nexthop=\"%s\"", gatewayRouterIP), - fmt.Sprintf("external_ids:name=%s", egressIPName), - "--", - "add", - "logical_router", - types.OVNClusterRouter, - "policies", - "@lr-policy", - ) - if err != nil { - return fmt.Errorf("unable to create logical router policy: %s, stderr: %s, err: %v", status.EgressIP, stderr, err) + if isEgressIPv6 { + gatewayRouterIPv6s = append(gatewayRouterIPv6s, gatewayRouterIP) + } else { + gatewayRouterIPv4s = append(gatewayRouterIPv4s, gatewayRouterIP) + } + } + for _, podIP := range podIps { + if utilnet.IsIPv6(podIP) { + if len(gatewayRouterIPv6s) > 0 { + filterOption := fmt.Sprintf("ip6.src == %s", podIP.String()) + if err := cb(filterOption, egressIPName, gatewayRouterIPv6s); err != nil { + return err + } + } else { + klog.Errorf("Egress IPv6 request cannot be handled for pod IP: %s, no IPv6 gateway router addresses are associated with the egress nodes", podIP) + } + } else if !utilnet.IsIPv6(podIP) { + if len(gatewayRouterIPv4s) > 0 { + filterOption := fmt.Sprintf("ip4.src == %s", podIP.String()) + if err := cb(filterOption, egressIPName, gatewayRouterIPv4s); err != nil { + return err + } + } else { + klog.Errorf("Egress IPv4 request cannot be handled for pod IP: %s, no IPv4 gateway router addresses are associated with the egress nodes", podIP) } } } return nil } -func (e *egressIPController) deleteEgressReroutePolicy(podIps []net.IP, status egressipv1.EgressIPStatusItem, egressIPName string) error { - isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) +func (e *egressIPController) createEgressReroutePolicy(filterOption, egressIPName string, gatewayRouterIPs []net.IP) error { + policyIDs, err := findReroutePolicyIDs(filterOption, egressIPName, gatewayRouterIPs) if err != nil { - return fmt.Errorf("unable to retrieve gateway IP for node: %s, err: %v", status.Node, err) + return err } - for _, podIP := range podIps { - var filterOption string - if utilnet.IsIPv6(podIP) && utilnet.IsIPv6String(status.EgressIP) { - filterOption = fmt.Sprintf("ip6.src == %s", podIP.String()) - } else if !utilnet.IsIPv6(podIP) && !utilnet.IsIPv6String(status.EgressIP) { - filterOption = fmt.Sprintf("ip4.src == %s", podIP.String()) - } - policyIDs, err := findReroutePolicyIDs(filterOption, egressIPName, gatewayRouterIP) + if policyIDs == nil { + _, stderr, err := util.RunOVNNbctl( + "--id=@lr-policy", + "create", + "logical_router_policy", + "action=reroute", + fmt.Sprintf("match=\"%s\"", filterOption), + fmt.Sprintf("priority=%v", types.EgressIPReroutePriority), + fmt.Sprintf("nexthops=%q", gatewayRouterIPs), + fmt.Sprintf("external_ids:name=%s", egressIPName), + "--", + "add", + "logical_router", + types.OVNClusterRouter, + "policies", + "@lr-policy", + ) if err != nil { - return err + return fmt.Errorf("unable to create logical router policy, stderr: %s, err: %v", stderr, err) } - for _, policyID := range policyIDs { - _, stderr, err := util.RunOVNNbctl( - "remove", - "logical_router", - types.OVNClusterRouter, - "policies", - policyID, - ) - if err != nil { - return fmt.Errorf("unable to remove logical router policy: %s, stderr: %s, err: %v", status.EgressIP, stderr, err) - } + } + return nil +} + +func (e *egressIPController) deleteEgressReroutePolicy(filterOption, egressIPName string, gatewayRouterIPs []net.IP) error { + policyIDs, err := findReroutePolicyIDs(filterOption, egressIPName, gatewayRouterIPs) + if err != nil { + return err + } + for _, policyID := range policyIDs { + _, stderr, err := util.RunOVNNbctl( + "remove", + "logical_router", + types.OVNClusterRouter, + "policies", + policyID, + ) + if err != nil { + return fmt.Errorf("unable to remove logical router policy, stderr: %s, err: %v", stderr, err) } } return nil } -func findReroutePolicyIDs(filterOption, egressIPName string, gatewayRouterIP net.IP) ([]string, error) { +func findReroutePolicyIDs(filterOption, egressIPName string, gatewayRouterIPs []net.IP) ([]string, error) { policyIDs, stderr, err := util.RunOVNNbctl( "--format=csv", "--data=bare", @@ -989,7 +1001,7 @@ func findReroutePolicyIDs(filterOption, egressIPName string, gatewayRouterIP net fmt.Sprintf("match=\"%s\"", filterOption), fmt.Sprintf("priority=%v", types.EgressIPReroutePriority), fmt.Sprintf("external_ids:name=%s", egressIPName), - fmt.Sprintf("nexthop=\"%s\"", gatewayRouterIP), + fmt.Sprintf("nexthops=%q", gatewayRouterIPs), ) if err != nil { return nil, fmt.Errorf("unable to find logical router policy for EgressIP: %s, stderr: %s, err: %v", egressIPName, stderr, err) diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index db265f75d8..506d494a50 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -27,10 +27,10 @@ func (f fakeEgressIPDialer) dial(ip net.IP) bool { var ( reroutePolicyID = "reroute_policy_id" natID = "nat_id" - nodeLogicalRouterIPv6 = "fef0::56" - nodeLogicalRouterIPv4 = "100.64.0.2" - nodeLogicalRouterIfAddrV6 = nodeLogicalRouterIPv6 + "/125" - nodeLogicalRouterIfAddrV4 = nodeLogicalRouterIPv4 + "/29" + nodeLogicalRouterIPv6 = []string{"fef0::56"} + nodeLogicalRouterIPv4 = []string{"100.64.0.2"} + nodeLogicalRouterIfAddrV6 = nodeLogicalRouterIPv6[0] + "/125" + nodeLogicalRouterIfAddrV4 = nodeLogicalRouterIPv4[0] + "/29" ) const ( @@ -310,8 +310,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ - fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv4), - fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthop=\"%s\" external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv4, eIP.Name, types.OVNClusterRouter), + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv4), + fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthops=%q external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv4, eIP.Name, types.OVNClusterRouter), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find nat external_ids:name=%s logical_ip=\"%s\" external_ip=\"%s\"", eIP.Name, egressPod.Status.PodIP, egressIP), fmt.Sprintf("ovn-nbctl --timeout=15 --id=@nat create nat type=snat %s %s %s %s -- add logical_router GR_%s nat @nat", fmt.Sprintf("logical_port=k8s-%s", node2.Name), fmt.Sprintf("external_ip=\"%s\"", egressIP), fmt.Sprintf("logical_ip=\"%s\"", egressPod.Status.PodIP), fmt.Sprintf("external_ids:name=%s", eIP.Name), node2.Name), }, @@ -465,8 +465,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ - fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv4), - fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthop=\"%s\" external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv4, eIP.Name, types.OVNClusterRouter), + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv4), + fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthops=%q external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv4, eIP.Name, types.OVNClusterRouter), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find nat external_ids:name=%s logical_ip=\"%s\" external_ip=\"%s\"", eIP.Name, egressPod.Status.PodIP, egressIP), fmt.Sprintf("ovn-nbctl --timeout=15 --id=@nat create nat type=snat %s %s %s %s -- add logical_router GR_%s nat @nat", fmt.Sprintf("logical_port=k8s-%s", node2.Name), fmt.Sprintf("external_ip=\"%s\"", egressIP), fmt.Sprintf("logical_ip=\"%s\"", egressPod.Status.PodIP), fmt.Sprintf("external_ids:name=%s", eIP.Name), node2.Name), }, @@ -547,8 +547,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ - fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), - fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthop=\"%s\" external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthops=%q external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find nat external_ids:name=%s logical_ip=\"%s\" external_ip=\"%s\"", eIP.Name, egressPod.Status.PodIP, egressIP), fmt.Sprintf("ovn-nbctl --timeout=15 --id=@nat create nat type=snat %s %s %s %s -- add logical_router GR_%s nat @nat", fmt.Sprintf("logical_port=k8s-%s", node2.name), fmt.Sprintf("external_ip=\"%s\"", egressIP), fmt.Sprintf("logical_ip=\"%s\"", egressPod.Status.PodIP), fmt.Sprintf("external_ids:name=%s", eIP.Name), node2.name), }, @@ -570,7 +570,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmd( &ovntest.ExpectedCmd{ - Cmd: fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + Cmd: fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), Output: reroutePolicyID, }, ) @@ -658,8 +658,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ - fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), - fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthop=\"%s\" external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthops=%q external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find nat external_ids:name=%s logical_ip=\"%s\" external_ip=\"%s\"", eIP.Name, egressPod.Status.PodIP, egressIP), fmt.Sprintf("ovn-nbctl --timeout=15 --id=@nat create nat type=snat %s %s %s %s -- add logical_router GR_%s nat @nat", fmt.Sprintf("logical_port=k8s-%s", node2.name), fmt.Sprintf("external_ip=\"%s\"", egressIP), fmt.Sprintf("logical_ip=\"%s\"", egressPod.Status.PodIP), fmt.Sprintf("external_ids:name=%s", eIP.Name), node2.name), }, @@ -764,8 +764,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ - fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", podV6IP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), - fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthop=\"%s\" external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", podV6IP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", podV6IP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthops=%q external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", podV6IP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find nat external_ids:name=%s logical_ip=\"%s\" external_ip=\"%s\"", eIP.Name, podV6IP, egressIP), fmt.Sprintf("ovn-nbctl --timeout=15 --id=@nat create nat type=snat %s %s %s %s -- add logical_router GR_%s nat @nat", fmt.Sprintf("logical_port=k8s-%s", node2.name), fmt.Sprintf("external_ip=\"%s\"", egressIP), fmt.Sprintf("logical_ip=\"%s\"", podV6IP), fmt.Sprintf("external_ids:name=%s", eIP.Name), node2.name), }, @@ -907,8 +907,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ - fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), - fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthop=\"%s\" external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthops=%q external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find nat external_ids:name=%s logical_ip=\"%s\" external_ip=\"%s\"", eIP.Name, egressPod.Status.PodIP, egressIP), fmt.Sprintf("ovn-nbctl --timeout=15 --id=@nat create nat type=snat %s %s %s %s -- add logical_router GR_%s nat @nat", fmt.Sprintf("logical_port=k8s-%s", node2.name), fmt.Sprintf("external_ip=\"%s\"", egressIP), fmt.Sprintf("logical_ip=\"%s\"", egressPod.Status.PodIP), fmt.Sprintf("external_ids:name=%s", eIP.Name), node2.name), }, @@ -930,7 +930,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmd( &ovntest.ExpectedCmd{ - Cmd: fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + Cmd: fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), Output: reroutePolicyID, }, ) @@ -1089,8 +1089,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ - fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), - fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthop=\"%s\" external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthops=%q external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find nat external_ids:name=%s logical_ip=\"%s\" external_ip=\"%s\"", eIP.Name, egressPod.Status.PodIP, egressIP), fmt.Sprintf("ovn-nbctl --timeout=15 --id=@nat create nat type=snat %s %s %s %s -- add logical_router GR_%s nat @nat", fmt.Sprintf("logical_port=k8s-%s", node2.name), fmt.Sprintf("external_ip=\"%s\"", egressIP), fmt.Sprintf("logical_ip=\"%s\"", egressPod.Status.PodIP), fmt.Sprintf("external_ids:name=%s", eIP.Name), node2.name), }, @@ -1124,7 +1124,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { } fakeOvn.fakeExec.AddFakeCmd( &ovntest.ExpectedCmd{ - Cmd: fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + Cmd: fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), Output: reroutePolicyID, }, ) @@ -1146,8 +1146,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ - fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), - fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthop=\"%s\" external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthops=%q external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find nat external_ids:name=%s logical_ip=\"%s\" external_ip=\"%s\"", eIP.Name, egressPod.Status.PodIP, updatedEgressIP.String()), fmt.Sprintf("ovn-nbctl --timeout=15 --id=@nat create nat type=snat %s %s %s %s -- add logical_router GR_%s nat @nat", fmt.Sprintf("logical_port=k8s-%s", node2.name), fmt.Sprintf("external_ip=\"%s\"", updatedEgressIP.String()), fmt.Sprintf("logical_ip=\"%s\"", egressPod.Status.PodIP), fmt.Sprintf("external_ids:name=%s", eIP.Name), node2.name), }, @@ -1223,8 +1223,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ - fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthop=\"%s\"", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), - fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthop=\"%s\" external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy match=\"%s\" priority=%s external_ids:name=%s nexthops=%q", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, eIP.Name, nodeLogicalRouterIPv6), + fmt.Sprintf("ovn-nbctl --timeout=15 --id=@lr-policy create logical_router_policy action=reroute match=\"%s\" priority=%s nexthops=%q external_ids:name=%s -- add logical_router %s policies @lr-policy", fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), types.EgressIPReroutePriority, nodeLogicalRouterIPv6, eIP.Name, types.OVNClusterRouter), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find nat external_ids:name=%s logical_ip=\"%s\" external_ip=\"%s\"", eIP.Name, egressPod.Status.PodIP, egressIP.String()), fmt.Sprintf("ovn-nbctl --timeout=15 --id=@nat create nat type=snat %s %s %s %s -- add logical_router GR_%s nat @nat", fmt.Sprintf("logical_port=k8s-%s", node2.name), fmt.Sprintf("external_ip=\"%s\"", egressIP.String()), fmt.Sprintf("logical_ip=\"%s\"", egressPod.Status.PodIP), fmt.Sprintf("external_ids:name=%s", eIP.Name), node2.name), }, From dc423eb6fa1fca3e47226c1fcb8100b60972b642 Mon Sep 17 00:00:00 2001 From: Alexander Constantinescu Date: Fri, 25 Jun 2021 14:23:41 +0200 Subject: [PATCH 04/10] Update e2e to verify egress IP with ECMP Signed-off-by: Alexander Constantinescu (cherry picked from commit ef615e6db72efc888c36a8580563527b2f2ceab5) --- test/e2e/e2e.go | 56 +++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 20 deletions(-) diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 71de88de40..cb50f89a3b 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -581,6 +581,24 @@ var _ = ginkgo.Describe("e2e egress IP validation", func() { } } + removeSliceElement := func(s []string, i int) []string { + s[i] = s[len(s)-1] + return s[:len(s)-1] + } + + // targetExternalContainerAndTest targets the external test container from + // our test pods, collects its logs and verifies that the logs have traces + // of the `verifyIPs` provided. We need to target the external test + // container multiple times until we verify that all IPs provided by + // `verifyIPs` have been verified. This is done by passing it a slice of + // verifyIPs and removing each item when it has been found. This function is + // wrapped in a `wait.PollImmediate` which results in the fact that it only + // passes once verifyIPs is of length 0. targetExternalContainerAndTest + // initiates only a single connection at a time, sequentially, hence: we + // perform one connection attempt, check that the IP seen is expected, + // remove it from the list of verifyIPs, see that it's length is not 0 and + // retry again. We do this until all IPs have been seen. If that never + // happens (because of a bug) the test fails. targetExternalContainerAndTest := func(targetNode node, podName, podNamespace string, expectSuccess bool, verifyIPs []string) wait.ConditionFunc { return func() (bool, error) { _, err := framework.RunKubectl(podNamespace, "exec", podName, "--", "curl", "--connect-timeout", "2", net.JoinHostPort(targetNode.nodeIP, "80")) @@ -603,18 +621,17 @@ var _ = ginkgo.Describe("e2e egress IP validation", func() { targetNodeLogs = strings.TrimSuffix(targetNodeLogs, "\n") logLines := strings.Split(targetNodeLogs, "\n") lastLine := logLines[len(logLines)-1] - var found bool - for _, verifyIP := range verifyIPs { - if strings.Contains(lastLine, verifyIP) { - found = true + for i := 0; i < len(verifyIPs); i++ { + if strings.Contains(lastLine, verifyIPs[i]) { + verifyIPs = removeSliceElement(verifyIPs, i) break } } - if !found && expectSuccess { + if len(verifyIPs) != 0 && expectSuccess { framework.Logf("the test external container did not have any trace of the IPs: %v being logged, last logs: %s", verifyIPs, logLines[len(logLines)-1]) return false, nil } - if !expectSuccess && found { + if !expectSuccess && len(verifyIPs) == 0 { framework.Logf("the test external container did have a trace of the IPs: %v being logged, it should not have, last logs: %s", verifyIPs, logLines[len(logLines)-1]) return false, nil } @@ -720,21 +737,21 @@ var _ = ginkgo.Describe("e2e egress IP validation", func() { 1. Create an EgressIP object with two egress IPs defined 2. Check that the status is of length two and both are assigned to different nodes 3. Create two pods matching the EgressIP: one running on each of the egress nodes - 4. Check connectivity from both to an external "node" and verify that the IP is one of the two above + 4. Check connectivity from both to an external "node" and verify that the IPs are both of the above 5. Check connectivity from one pod to the other and verify that the connection is achieved 6. Check connectivity from both pods to the api-server (running hostNetwork:true) and verifying that the connection is achieved 7. Update one of the pods, unmatching the EgressIP 8. Check connectivity from that one to an external "node" and verify that the IP is the node IP. - 9. Check connectivity from the other one to an external "node" and verify that the IP is one of the egress IPs. + 9. Check connectivity from the other one to an external "node" and verify that the IPs are both of the above 10. Remove the node label off one of the egress node 11. Check that the status is of length one - 12. Check connectivity from the remaining pod to an external "node" and verify that the IP is one of the egress IPs. + 12. Check connectivity from the remaining pod to an external "node" and verify that the IP is the remaining egress IP 13. Remove the node label off the last egress node 14. Check that the status is of length zero 15. Check connectivity from the remaining pod to an external "node" and verify that the IP is the node IP. 16. Re-add the label to one of the egress nodes 17. Check that the status is of length one - 18. Check connectivity from the remaining pod to an external "node" and verify that the IP is one of the egress IPs. + 18. Check connectivity from the remaining pod to an external "node" and verify that the IP is the remaining egress IP */ ginkgo.It("Should validate the egress IP functionality against remote hosts", func() { @@ -815,12 +832,11 @@ spec: framework.ExpectNoError(err, "Step 3. Create two pods matching the EgressIP: one running on each of the egress nodes, failed, err: %v", err) pod2IP := getPodAddress(pod2Name, f.Namespace.Name) - - ginkgo.By("4. Check connectivity from both to an external \"node\" and verify that the IP is one of the two above") + ginkgo.By("4. Check connectivity from both to an external \"node\" and verify that the IPs are both of the above") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String(), egressIP2.String()})) - framework.ExpectNoError(err, "Step 4. Check connectivity from first to an external \"node\" and verify that the IP is one of the two above, failed: %v", err) + framework.ExpectNoError(err, "Step 4. Check connectivity from first to an external \"node\" and verify that the IPs are both of the above, failed: %v", err) err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, podNamespace.Name, true, []string{egressIP1.String(), egressIP2.String()})) - framework.ExpectNoError(err, "Step 4. Check connectivity from second to an external \"node\" and verify that the IP is one of the two above, failed: %v", err) + framework.ExpectNoError(err, "Step 4. Check connectivity from second to an external \"node\" and verify that the IPs are both of the above, failed: %v", err) ginkgo.By("5. Check connectivity from one pod to the other and verify that the connection is achieved") err = wait.PollImmediate(retryInterval, retryTimeout, targetPodAndTest(f.Namespace.Name, pod1Name, pod2Name, pod2IP)) @@ -839,7 +855,7 @@ spec: err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, podNamespace.Name, true, []string{pod2Node.nodeIP})) framework.ExpectNoError(err, "Step 8. Check connectivity from that one to an external \"node\" and verify that the IP is the node IP, failed, err: %v", err) - ginkgo.By("9. Check connectivity from the other one to an external \"node\" and verify that the IP is one of the egress IPs.") + ginkgo.By("9. Check connectivity from the other one to an external \"node\" and verify that the IPs are both of the above") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String(), egressIP2.String()})) framework.ExpectNoError(err, "Step 9. Check connectivity from the other one to an external \"node\" and verify that the IP is one of the egress IPs, failed, err: %v", err) @@ -849,9 +865,9 @@ spec: ginkgo.By("11. Check that the status is of length one") statuses = verifyEgressIPStatusLengthEquals(1) - ginkgo.By("12. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is one of the egress IPs.") + ginkgo.By("12. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the remaining egress IP") err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{statuses[0].EgressIP})) - framework.ExpectNoError(err, "Step 12. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is one of the egress IPs, failed, err: %v", err) + framework.ExpectNoError(err, "Step 12. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the remaining egress IP, failed, err: %v", err) ginkgo.By("13. Remove the node label off the last egress node") framework.RemoveLabelOffNode(f.ClientSet, egress2Node.name, "k8s.ovn.org/egress-assignable") @@ -869,9 +885,9 @@ spec: ginkgo.By("17. Check that the status is of length one") statuses = verifyEgressIPStatusLengthEquals(1) - ginkgo.By("18. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is one of the egress IPs.") - err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{egressIP1.String(), egressIP2.String()})) - framework.ExpectNoError(err, "Step 18. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is one of the egress IPs, failed, err: %v", err) + ginkgo.By("18. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the remaining egress IP") + err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod1Name, podNamespace.Name, true, []string{statuses[0].EgressIP})) + framework.ExpectNoError(err, "Step 18. Check connectivity from the remaining pod to an external \"node\" and verify that the IP is the remaining egress IP, failed, err: %v", err) }) // Validate the egress IP works with egress firewall by creating two httpd From f727978be4a0b23a22e173a1fd7f8309e30e208f Mon Sep 17 00:00:00 2001 From: Alexander Constantinescu Date: Mon, 22 Mar 2021 23:27:11 +0100 Subject: [PATCH 05/10] Clean legacy logical router policies for egress IP on upgrade The old egress IP implementation used to create an LRP per egress node and set nexthop for each. We thus need to remove all of that since we'll be creating one LRP now with multiple nexthops. Conflicts: go-controller/pkg/ovn/egressip.go go-controller/pkg/ovn/egressip_test.go Signed-off-by: Alexander Constantinescu (cherry picked from commit 904ebce137693b8c18535336260231e5ec78a30e) --- go-controller/pkg/ovn/egressip.go | 36 ++++++++++++++++++++++++++ go-controller/pkg/ovn/egressip_test.go | 32 +++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 97a61e1c33..9970d1bb14 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -210,6 +210,22 @@ func (oc *Controller) syncEgressIPs(eIPs []interface{}) { eNode.tainted = false } } + policyIDs, err := findLegacyReroutePolicyIDs() + if err != nil { + klog.Errorf("Unable to clean up legacy egress IP setup, err: %v", err) + } + for _, policyID := range policyIDs { + _, stderr, err := util.RunOVNNbctl( + "remove", + "logical_router", + types.OVNClusterRouter, + "policies", + policyID, + ) + if err != nil { + klog.Errorf("Unable to delete legacy logical router policy: %s, stderr: %s, err: %v", policyID, stderr, err) + } + } // This part will take of syncing stale data which we might have in OVN if // there's no ovnkube-master running for a while, while there are changes to @@ -990,6 +1006,26 @@ func (e *egressIPController) deleteEgressReroutePolicy(filterOption, egressIPNam return nil } +func findLegacyReroutePolicyIDs() ([]string, error) { + policyIDs, stderr, err := util.RunOVNNbctl( + "--format=csv", + "--data=bare", + "--no-heading", + "--columns=_uuid", + "find", + "logical_router_policy", + fmt.Sprintf("priority=%v", types.EgressIPReroutePriority), + "nexthop!=[]", + ) + if err != nil { + return nil, fmt.Errorf("unable to find legacy logical router policies, stderr: %s, err: %v", stderr, err) + } + if policyIDs == "" { + return nil, nil + } + return strings.Split(policyIDs, "\n"), nil +} + func findReroutePolicyIDs(filterOption, egressIPName string, gatewayRouterIPs []net.IP) ([]string, error) { policyIDs, stderr, err := util.RunOVNNbctl( "--format=csv", diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 506d494a50..6f3d75bb10 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -249,6 +249,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node1 options:nat-addresses=router"), }, @@ -404,6 +405,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node1 options:nat-addresses=router"), }, @@ -534,6 +536,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -645,6 +648,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -734,6 +738,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -822,6 +827,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -894,6 +900,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -1001,6 +1008,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -1076,6 +1084,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -1210,6 +1219,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -1318,6 +1328,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.start(ctx) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node1 options:nat-addresses=router"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node2 options:nat-addresses=router"), @@ -1394,6 +1405,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), }, ) @@ -1475,6 +1487,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add %s %v ip4.src == 10.128.0.0/14 && ip4.dst == %s allow", types.OVNClusterRouter, types.DefaultNoRereoutePriority, config.Gateway.V4JoinSubnet), }, @@ -1596,6 +1609,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node1 options:nat-addresses=router"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node2 options:nat-addresses=router"), @@ -1692,6 +1706,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node1 options:nat-addresses=router"), }, @@ -1824,6 +1839,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node2 options:nat-addresses=router"), }, @@ -1860,6 +1876,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ) fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 remove logical_router %s%s nat nat-id", types.GWRouterPrefix, node1.Name), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), @@ -1957,6 +1974,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_%s options:nat-addresses=router", node1.Name), }, @@ -2071,6 +2089,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), }, ) @@ -2206,6 +2225,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add %s %v ip4.src == 10.128.0.0/14 && ip4.dst == %s allow", types.OVNClusterRouter, types.DefaultNoRereoutePriority, config.Gateway.V4JoinSubnet), }, @@ -2337,6 +2357,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --may-exist lr-policy-add ovn_cluster_router 101 ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14 allow"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node1 options:nat-addresses=router"), fmt.Sprintf("ovn-nbctl --timeout=15 set logical_switch_port etor-GR_node2 options:nat-addresses=router"), @@ -2825,6 +2846,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -2868,6 +2890,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -2912,6 +2935,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -2977,6 +3001,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -3036,6 +3061,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -3090,6 +3116,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -3151,6 +3178,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -3206,6 +3234,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -3259,6 +3288,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -3311,6 +3341,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, @@ -3359,6 +3390,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.fakeExec.AddFakeCmdsNoOutputNoError( []string{ + fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid find logical_router_policy priority=%s nexthop!=[]", types.EgressIPReroutePriority), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,match find logical_router_policy priority=100"), fmt.Sprintf("ovn-nbctl --timeout=15 --format=csv --data=bare --no-heading --columns=_uuid,external_ids,logical_ip find nat"), }, From 57bb0dec50d14efbbe08622399cc22283cbf348e Mon Sep 17 00:00:00 2001 From: Flavio Fernandes Date: Tue, 1 Mar 2022 19:18:16 +0000 Subject: [PATCH 06/10] Enable info logging for successful assignment of egress IP Conflicts: go-controller/pkg/ovn/egressip.go Signed-off-by: Flavio Fernandes (cherry picked from commit fd9fae4589cb966663e1ca670651d1d3dde79314) --- go-controller/pkg/kube/kube.go | 2 +- go-controller/pkg/ovn/egressip.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/kube/kube.go b/go-controller/pkg/kube/kube.go index 578f9e6423..a5515fb412 100644 --- a/go-controller/pkg/kube/kube.go +++ b/go-controller/pkg/kube/kube.go @@ -133,7 +133,7 @@ func (k *Kube) UpdateEgressFirewall(egressfirewall *egressfirewall.EgressFirewal // UpdateEgressIP updates the EgressIP with the provided EgressIP data func (k *Kube) UpdateEgressIP(eIP *egressipv1.EgressIP) error { - klog.Infof("Updating status on EgressIP %s", eIP.Name) + klog.Infof("Updating status on EgressIP %s status %v", eIP.Name, eIP.Status) _, err := k.EIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIP, metav1.UpdateOptions{}) return err } diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 9970d1bb14..710d29616d 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -542,7 +542,7 @@ func (oc *Controller) assignEgressIPs(eIP *egressipv1.EgressIP) error { EgressIP: eIPC.String(), Node: assignableNodes[i].name, }) - klog.V(5).Infof("Successful assignment of egress IP: %s on node: %+v", egressIP, assignableNodes[i]) + klog.Infof("Successful assignment of egress IP: %s on node: %+v", egressIP, assignableNodes[i]) break } } From 4a73da1782553961055f45340f15318a786f3016 Mon Sep 17 00:00:00 2001 From: Flavio Fernandes Date: Tue, 1 Mar 2022 19:02:22 +0000 Subject: [PATCH 07/10] Fix cache building used for removing stale egress IPs Instead of relying on kapi.Pod.Status.PodIPs use the same approach as addPodEgressIPAssignments Conflicts: go-controller/pkg/ovn/egressip.go Signed-off-by: Patryk Diak (cherry picked from commit 8081e22f062635e088641fe2aad92f8600e0c902) --- go-controller/pkg/ovn/egressip.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 710d29616d..8d30014c16 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -377,9 +377,13 @@ func (oc *Controller) generatePodIPCacheForEgressIP(eIPs []interface{}) (map[str continue } for _, pod := range pods { - for _, podIP := range pod.Status.PodIPs { - ip := net.ParseIP(podIP.IP) - egressIPToPodIPCache[egressIP.Name].Insert(ip.String()) + podIPs, err := oc.eIPC.getPodIPs(pod) + if err != nil { + klog.Errorf("Unable to retrieve pod %s IPs, err: %v", pod.Name, err) + continue + } + for _, podIP := range podIPs { + egressIPToPodIPCache[egressIP.Name].Insert(podIP.String()) } } } From 9732a40c65df0ff15f4bd5ea577ae59ef4d99226 Mon Sep 17 00:00:00 2001 From: Flavio Fernandes Date: Thu, 17 Mar 2022 21:09:17 +0000 Subject: [PATCH 08/10] Egress IP: Remove legacy logical router policies in bulk Signed-off-by: Flavio Fernandes (cherry picked from commit bf6bcfaa7e5688effab50ade82ea430aa1129097) --- go-controller/pkg/ovn/egressip.go | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 8d30014c16..2bdb60ab9e 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -211,21 +211,26 @@ func (oc *Controller) syncEgressIPs(eIPs []interface{}) { } } policyIDs, err := findLegacyReroutePolicyIDs() - if err != nil { + if err == nil { + numLegacyPolicies := len(policyIDs) + if numLegacyPolicies > 0 { + klog.Infof("Removing %d legacy logical router policies", numLegacyPolicies) + } + } else { klog.Errorf("Unable to clean up legacy egress IP setup, err: %v", err) } + txn := util.NewNBTxn() for _, policyID := range policyIDs { - _, stderr, err := util.RunOVNNbctl( - "remove", - "logical_router", - types.OVNClusterRouter, - "policies", - policyID, - ) + args := []string{"remove", "logical_router", types.OVNClusterRouter, "policies", policyID} + _, stderr, err := txn.AddOrCommit(args) if err != nil { klog.Errorf("Unable to delete legacy logical router policy: %s, stderr: %s, err: %v", policyID, stderr, err) } } + _, stderr, err := txn.Commit() + if err != nil { + klog.Errorf("Unable to delete legacy logical router policies, stderr: %s, err: %v", stderr, err) + } // This part will take of syncing stale data which we might have in OVN if // there's no ovnkube-master running for a while, while there are changes to From 537487f603f44857b6c19f8c97ed8abdf409eb12 Mon Sep 17 00:00:00 2001 From: Flavio Fernandes Date: Sat, 12 Mar 2022 22:37:37 +0000 Subject: [PATCH 09/10] EgressIp sync needs to account for stale nexthops In cases where OVN database for logical router policies or NATs used by EgressIPs have stale nexthops or wrong external_ips, the sync function should remove them, so the proper row/column gets set. This commit backports the changes in a way that libovsdb is unused. Because of that, all libovsdb calls needed to be converted to legacy util.RunOVNNbctl() implementations. Conflicts: go-controller/pkg/ovn/egressip.go Signed-off-by: Flavio Fernandes (cherry picked from commit 39fcdcf5da479ff243f7340759a961dfb20130fc) --- go-controller/pkg/ovn/egressip.go | 113 +++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 25 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 2bdb60ab9e..b37e4414af 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -121,6 +121,12 @@ func (oc *Controller) isEgressNodeReachable(egressNode *kapi.Node) bool { return false } +type egressIPCacheEntry struct { + podIPs sets.String + gatewayRouterIPs sets.String + egressIPs sets.String +} + func (oc *Controller) syncEgressIPs(eIPs []interface{}) { oc.eIPC.allocatorMutex.Lock() defer oc.eIPC.allocatorMutex.Unlock() @@ -239,18 +245,18 @@ func (oc *Controller) syncEgressIPs(eIPs []interface{}) { // - Egress IPs which have been deleted while ovnkube-master was down // - pods/namespaces which have stopped matching on egress IPs while // ovnkube-master was down - if egressIPToPodIPCache, err := oc.generatePodIPCacheForEgressIP(eIPs); err == nil { - oc.syncStaleEgressReroutePolicy(egressIPToPodIPCache) - oc.syncStaleNATRules(egressIPToPodIPCache) + if egressIPCache, err := oc.generateCacheForEgressIP(eIPs); err == nil { + oc.syncStaleEgressReroutePolicy(egressIPCache) + oc.syncStaleNATRules(egressIPCache) } } -func (oc *Controller) syncStaleEgressReroutePolicy(egressIPToPodIPCache map[string]sets.String) { +func (oc *Controller) syncStaleEgressReroutePolicy(egressIPCache map[string]egressIPCacheEntry) { policyItems, stderr, err := util.RunOVNNbctl( "--format=csv", "--data=bare", "--no-heading", - "--columns=_uuid,external_ids,match", + "--columns=_uuid,external_ids,match,nexthops", "find", "logical_router_policy", fmt.Sprintf("priority=%v", types.EgressIPReroutePriority), @@ -259,6 +265,10 @@ func (oc *Controller) syncStaleEgressReroutePolicy(egressIPToPodIPCache map[stri klog.Errorf("Unable to sync egress IPs, unable to find logical router policies, stderr: %s, err: %v", stderr, err) return } + + // Set of visited podIps per egressIPName, so duplicates can be detected and removed + ovnEgressIPRoutePolicies := make(map[string]sets.String) + for _, policyItem := range strings.Split(policyItems, "\n") { if policyItem == "" { continue @@ -268,6 +278,7 @@ func (oc *Controller) syncStaleEgressReroutePolicy(egressIPToPodIPCache map[stri UUID := policyFields[0] externalID := policyFields[1] match := policyFields[2] + nexthops := sets.NewString(strings.Split(policyFields[3], " ")...) // A match condition for egress IPs will look like: // ip4.src == 10.244.2.5 @@ -278,10 +289,15 @@ func (oc *Controller) syncStaleEgressReroutePolicy(egressIPToPodIPCache map[stri klog.Errorf("Unable to parse logical_ip: %s from match condition: %s", logicalIP, match) continue } + parsedLogicalIPStr := parsedLogicalIP.String() egressIPName := strings.Split(externalID, "=")[1] - podIPCache, exists := egressIPToPodIPCache[egressIPName] - if !exists || !podIPCache.Has(parsedLogicalIP.String()) { + if _, exists := ovnEgressIPRoutePolicies[egressIPName]; !exists { + ovnEgressIPRoutePolicies[egressIPName] = sets.NewString() + } + cacheEntry, exists := egressIPCache[egressIPName] + if !exists || cacheEntry.gatewayRouterIPs.Len() == 0 || !cacheEntry.podIPs.Has(parsedLogicalIPStr) || ovnEgressIPRoutePolicies[egressIPName].Has(parsedLogicalIPStr) { + klog.Infof("syncStaleEgressReroutePolicy will delete %s due to no nexthop or stale or duplicate logical ip: %v", egressIPName, match) _, stderr, err := util.RunOVNNbctl( "remove", "logical_router", @@ -292,16 +308,25 @@ func (oc *Controller) syncStaleEgressReroutePolicy(egressIPToPodIPCache map[stri if err != nil { klog.Errorf("Unable to remove stale logical router policy for EgressIP: %s, stderr: %s, err: %v", egressIPName, stderr, err) } + continue + } + ovnEgressIPRoutePolicies[egressIPName].Insert(parsedLogicalIPStr) + + if !nexthops.Equal(cacheEntry.gatewayRouterIPs) { + klog.Infof("syncStaleEgressReroutePolicy will update nexthops of %s from: %v to: %v", egressIPName, nexthops.List(), cacheEntry.gatewayRouterIPs.List()) + if err = oc.eIPC.updateEgressReroutePolicyNexthops(UUID, cacheEntry.gatewayRouterIPs.List()); err != nil { + klog.Errorf("Unable to update nexthop for logical router policy for EgressIP: %s, err: %v", egressIPName, err) + } } } } -func (oc *Controller) syncStaleNATRules(egressIPToPodIPCache map[string]sets.String) { +func (oc *Controller) syncStaleNATRules(egressIPCache map[string]egressIPCacheEntry) { natItems, stderr, err := util.RunOVNNbctl( "--format=csv", "--data=bare", "--no-heading", - "--columns=_uuid,external_ids,logical_ip", + "--columns=_uuid,external_ids,logical_ip,external_ip", "find", "nat", ) @@ -318,6 +343,7 @@ func (oc *Controller) syncStaleNATRules(egressIPToPodIPCache map[string]sets.Str UUID := natFields[0] externalID := natFields[1] logicalIP := natFields[2] + externalIP := natFields[3] parsedLogicalIP := net.ParseIP(logicalIP).String() if !strings.Contains(externalID, "name") { @@ -325,8 +351,16 @@ func (oc *Controller) syncStaleNATRules(egressIPToPodIPCache map[string]sets.Str } egressIPName := strings.Split(externalID, "=")[1] - podIPCache, exists := egressIPToPodIPCache[egressIPName] - if !exists || !podIPCache.Has(parsedLogicalIP) { + isStaleRow := false + cacheEntry, exists := egressIPCache[egressIPName] + if !exists || !cacheEntry.podIPs.Has(parsedLogicalIP) { + klog.Infof("syncStaleNATRules will delete %s due to logical ip: %v", egressIPName, logicalIP) + isStaleRow = true + } else if !cacheEntry.egressIPs.Has(externalIP) { + klog.Infof("syncStaleNATRules will delete %s due to external ip: %v", egressIPName, externalIP) + isStaleRow = true + } + if isStaleRow { logicalRouters, stderr, err := util.RunOVNNbctl( "--format=csv", "--data=bare", @@ -357,19 +391,34 @@ func (oc *Controller) syncStaleNATRules(egressIPToPodIPCache map[string]sets.Str } } -// generatePodIPCacheForEgressIP builds a cache of egressIP name -> podIPs for fast +// generateCacheForEgressIP builds a cache of egressIP name -> podIPs for fast // access when syncing egress IPs. The Egress IP setup will return a lot of // atomic items with the same general information repeated across most (egressIP // name, logical IP defined for that name), hence use a cache to avoid round // trips to the API server per item. -func (oc *Controller) generatePodIPCacheForEgressIP(eIPs []interface{}) (map[string]sets.String, error) { - egressIPToPodIPCache := make(map[string]sets.String) +func (oc *Controller) generateCacheForEgressIP(eIPs []interface{}) (map[string]egressIPCacheEntry, error) { + egressIPCache := make(map[string]egressIPCacheEntry) for _, eIP := range eIPs { egressIP, ok := eIP.(*egressipv1.EgressIP) if !ok { continue } - egressIPToPodIPCache[egressIP.Name] = sets.NewString() + egressIPCache[egressIP.Name] = egressIPCacheEntry{ + podIPs: sets.NewString(), + gatewayRouterIPs: sets.NewString(), + egressIPs: sets.NewString(), + } + for _, status := range egressIP.Status.Items { + isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) + gatewayRouterIP, err := oc.eIPC.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + klog.Errorf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + continue + } + egressIPCache[egressIP.Name].gatewayRouterIPs.Insert(gatewayRouterIP.String()) + egressIPCache[egressIP.Name].egressIPs.Insert(status.EgressIP) + } + namespaces, err := oc.watchFactory.GetNamespacesBySelector(egressIP.Spec.NamespaceSelector) if err != nil { klog.Errorf("Error building egress IP sync cache, cannot retrieve namespaces for EgressIP: %s, err: %v", egressIP.Name, err) @@ -388,12 +437,12 @@ func (oc *Controller) generatePodIPCacheForEgressIP(eIPs []interface{}) (map[str continue } for _, podIP := range podIPs { - egressIPToPodIPCache[egressIP.Name].Insert(podIP.String()) + egressIPCache[egressIP.Name].podIPs.Insert(podIP.String()) } } } } - return egressIPToPodIPCache, nil + return egressIPCache, nil } func (oc *Controller) isAnyClusterNodeIP(ip net.IP) *egressNode { @@ -1015,7 +1064,7 @@ func (e *egressIPController) deleteEgressReroutePolicy(filterOption, egressIPNam return nil } -func findLegacyReroutePolicyIDs() ([]string, error) { +func findReroutePolicyIDs(filterOption, egressIPName string, gatewayRouterIPs []net.IP) ([]string, error) { policyIDs, stderr, err := util.RunOVNNbctl( "--format=csv", "--data=bare", @@ -1023,11 +1072,13 @@ func findLegacyReroutePolicyIDs() ([]string, error) { "--columns=_uuid", "find", "logical_router_policy", + fmt.Sprintf("match=\"%s\"", filterOption), fmt.Sprintf("priority=%v", types.EgressIPReroutePriority), - "nexthop!=[]", + fmt.Sprintf("external_ids:name=%s", egressIPName), + fmt.Sprintf("nexthops=%q", gatewayRouterIPs), ) if err != nil { - return nil, fmt.Errorf("unable to find legacy logical router policies, stderr: %s, err: %v", stderr, err) + return nil, fmt.Errorf("unable to find logical router policy for EgressIP: %s, stderr: %s, err: %v", egressIPName, stderr, err) } if policyIDs == "" { return nil, nil @@ -1035,7 +1086,21 @@ func findLegacyReroutePolicyIDs() ([]string, error) { return strings.Split(policyIDs, "\n"), nil } -func findReroutePolicyIDs(filterOption, egressIPName string, gatewayRouterIPs []net.IP) ([]string, error) { +func (e *egressIPController) updateEgressReroutePolicyNexthops(policyID string, nexthops []string) error { + nexthopsStr := strings.Join(nexthops, ",") + _, stderr, err := util.RunOVNNbctl( + "set", + "logical_router_policy", + policyID, + fmt.Sprintf("nexthops=[%s]", nexthopsStr), + ) + if err != nil { + return fmt.Errorf("unable to set nexthops to logical router policy %s, stderr: %s, err: %v", policyID, stderr, err) + } + return nil +} + +func findLegacyReroutePolicyIDs() ([]string, error) { policyIDs, stderr, err := util.RunOVNNbctl( "--format=csv", "--data=bare", @@ -1043,13 +1108,11 @@ func findReroutePolicyIDs(filterOption, egressIPName string, gatewayRouterIPs [] "--columns=_uuid", "find", "logical_router_policy", - fmt.Sprintf("match=\"%s\"", filterOption), fmt.Sprintf("priority=%v", types.EgressIPReroutePriority), - fmt.Sprintf("external_ids:name=%s", egressIPName), - fmt.Sprintf("nexthops=%q", gatewayRouterIPs), + "nexthop!=[]", ) if err != nil { - return nil, fmt.Errorf("unable to find logical router policy for EgressIP: %s, stderr: %s, err: %v", egressIPName, stderr, err) + return nil, fmt.Errorf("unable to find legacy logical router policies, stderr: %s, err: %v", stderr, err) } if policyIDs == "" { return nil, nil From fba695a1991829180a90b627698b1fd59733572f Mon Sep 17 00:00:00 2001 From: Flavio Fernandes Date: Wed, 23 Mar 2022 21:55:26 +0000 Subject: [PATCH 10/10] CARRY: fix idempotently updating egress IP route policies Do not use nexhops as part of findReroutePolicyIDs lookup. Ensure nexthops are correct when updating egress ip policies. Signed-off-by: Flavio Fernandes (cherry picked from commit 5852d49b2958ab5eab413d15abfcc79ed57134df) --- go-controller/pkg/ovn/egressip.go | 100 +++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 30 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index b37e4414af..c21c222b02 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -894,7 +894,7 @@ func (e *egressIPController) addPodEgressIP(eIP *egressipv1.EgressIP, pod *kapi. if e.needsRetry(pod) { e.podRetry.Delete(getPodKey(pod)) } - if err := e.handleEgressReroutePolicy(podIPs, eIP.Status.Items, eIP.Name, e.createEgressReroutePolicy); err != nil { + if err := e.handleEgressReroutePolicy(podIPs, eIP.Status.Items, eIP.Name, e.createOrUpdateEgressReroutePolicy); err != nil { return fmt.Errorf("unable to create logical router policy, err: %v", err) } for _, status := range eIP.Status.Items { @@ -973,7 +973,7 @@ func (e *egressIPController) needsRetry(pod *kapi.Pod) bool { return retry } -// createEgressReroutePolicy uses logical router policies to force egress traffic to the egress node, for that we need +// createOrUpdateEgressReroutePolicy uses logical router policies to force egress traffic to the egress node, for that we need // to retrive the internal gateway router IP attached to the egress node. This method handles both the shared and // local gateway mode case func (e *egressIPController) handleEgressReroutePolicy(podIps []net.IP, statuses []egressipv1.EgressIPStatusItem, egressIPName string, cb func(filterOption, egressIPName string, gatewayRouterIPs []net.IP) error) error { @@ -1015,12 +1015,13 @@ func (e *egressIPController) handleEgressReroutePolicy(podIps []net.IP, statuses return nil } -func (e *egressIPController) createEgressReroutePolicy(filterOption, egressIPName string, gatewayRouterIPs []net.IP) error { - policyIDs, err := findReroutePolicyIDs(filterOption, egressIPName, gatewayRouterIPs) +func (e *egressIPController) createOrUpdateEgressReroutePolicy(filterOption, egressIPName string, gatewayRouterIPs []net.IP) error { + logicalRoutePolicies, err := findReroutePolicyIDs(filterOption, egressIPName) if err != nil { return err } - if policyIDs == nil { + logicalRoutePoliciesLen := len(logicalRoutePolicies) + if logicalRoutePoliciesLen == 0 { _, stderr, err := util.RunOVNNbctl( "--id=@lr-policy", "create", @@ -1040,64 +1041,103 @@ func (e *egressIPController) createEgressReroutePolicy(filterOption, egressIPNam if err != nil { return fmt.Errorf("unable to create logical router policy, stderr: %s, err: %v", stderr, err) } + return nil + } + + if logicalRoutePoliciesLen > 1 { + klog.Warningf("Found %d OVN rows for egressIP %s %s. ovnkube-master restart may fix it.", logicalRoutePoliciesLen, egressIPName, filterOption) + } + + // At this point we know there is a policy for egressIPName with the wanted filterOption. Make sure its nexthops are proper. + expectedNexthops := sets.NewString() + for _, gatewayRouterIP := range gatewayRouterIPs { + expectedNexthops.Insert(string(gatewayRouterIP.String())) + } + if !expectedNexthops.Equal(logicalRoutePolicies[0].nexthops) { + nextHopsList := expectedNexthops.List() + klog.Infof("createOrUpdateEgressReroutePolicy will update nexthops of %s from: %v to: %v", egressIPName, logicalRoutePolicies[0].nexthops.List(), nextHopsList) + err = e.updateEgressReroutePolicyNexthops(logicalRoutePolicies[0].policyID, nextHopsList) + if err != nil { + return fmt.Errorf("unable to update nexthops for logical router policy %s, err: %v", egressIPName, err) + } } return nil } func (e *egressIPController) deleteEgressReroutePolicy(filterOption, egressIPName string, gatewayRouterIPs []net.IP) error { - policyIDs, err := findReroutePolicyIDs(filterOption, egressIPName, gatewayRouterIPs) + logicalRoutePolicies, err := findReroutePolicyIDs(filterOption, egressIPName) if err != nil { return err } - for _, policyID := range policyIDs { - _, stderr, err := util.RunOVNNbctl( + txn := util.NewNBTxn() + for _, logicalRoutePolicy := range logicalRoutePolicies { + args := []string{ "remove", "logical_router", types.OVNClusterRouter, "policies", - policyID, - ) + logicalRoutePolicy.policyID, + } + _, stderr, err := txn.AddOrCommit(args) if err != nil { return fmt.Errorf("unable to remove logical router policy, stderr: %s, err: %v", stderr, err) } } + _, stderr, err := txn.Commit() + if err != nil { + return fmt.Errorf("unable to remove logical router policies, stderr: %s, err: %v", stderr, err) + } return nil } -func findReroutePolicyIDs(filterOption, egressIPName string, gatewayRouterIPs []net.IP) ([]string, error) { - policyIDs, stderr, err := util.RunOVNNbctl( +func (e *egressIPController) updateEgressReroutePolicyNexthops(policyID string, nexthops []string) error { + nexthopsStr := strings.Join(nexthops, ",") + _, stderr, err := util.RunOVNNbctl( + "set", + "logical_router_policy", + policyID, + fmt.Sprintf("nexthops=[%s]", nexthopsStr), + ) + if err != nil { + return fmt.Errorf("unable to set nexthops to logical router policy %s, stderr: %s, err: %v", policyID, stderr, err) + } + return nil +} + +type logicalRouterPolicyRow struct { + policyID string + nexthops sets.String +} + +func findReroutePolicyIDs(filterOption, egressIPName string) ([]logicalRouterPolicyRow, error) { + stdout, stderr, err := util.RunOVNNbctl( "--format=csv", "--data=bare", "--no-heading", - "--columns=_uuid", + "--columns=_uuid,nexthops", "find", "logical_router_policy", fmt.Sprintf("match=\"%s\"", filterOption), fmt.Sprintf("priority=%v", types.EgressIPReroutePriority), fmt.Sprintf("external_ids:name=%s", egressIPName), - fmt.Sprintf("nexthops=%q", gatewayRouterIPs), ) if err != nil { return nil, fmt.Errorf("unable to find logical router policy for EgressIP: %s, stderr: %s, err: %v", egressIPName, stderr, err) } - if policyIDs == "" { - return nil, nil - } - return strings.Split(policyIDs, "\n"), nil -} -func (e *egressIPController) updateEgressReroutePolicyNexthops(policyID string, nexthops []string) error { - nexthopsStr := strings.Join(nexthops, ",") - _, stderr, err := util.RunOVNNbctl( - "set", - "logical_router_policy", - policyID, - fmt.Sprintf("nexthops=[%s]", nexthopsStr), - ) - if err != nil { - return fmt.Errorf("unable to set nexthops to logical router policy %s, stderr: %s, err: %v", policyID, stderr, err) + policyItems := strings.Split(stdout, "\n") + rows := make([]logicalRouterPolicyRow, 0, len(policyItems)) + for _, policyItem := range policyItems { + if policyItem == "" { + continue + } + policyFields := strings.Split(policyItem, ",") + rows = append(rows, logicalRouterPolicyRow{ + policyID: policyFields[0], + nexthops: sets.NewString(strings.Split(policyFields[1], " ")...), + }) } - return nil + return rows, nil } func findLegacyReroutePolicyIDs() ([]string, error) {