From 2356819f1bb66dbcf0ed295eb6b22021953d1510 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Mon, 20 Mar 2023 14:58:00 +0100 Subject: [PATCH 01/73] Add egressip feature enable flag to CM This commit passes the egressip_enabled_flag to cluster manager pod on control plane so that we can configure the feature from CM for IC Signed-off-by: Surya Seetharaman --- dist/images/ovnkube.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index 46c85f70a4..e56d4dbf88 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -1269,6 +1269,17 @@ ovn-cluster-manager() { echo "=============== ovn-cluster-manager (wait for ready_to_start_node) ========== MASTER ONLY" wait_for_event ready_to_start_node + egressip_enabled_flag= + if [[ ${ovn_egressip_enable} == "true" ]]; then + egressip_enabled_flag="--enable-egress-ip" + fi + + egressip_healthcheck_port_flag= + if [[ -n "${ovn_egress_ip_healthcheck_port}" ]]; then + egressip_healthcheck_port_flag="--egressip-node-healthcheck-port=${ovn_egress_ip_healthcheck_port}" + fi + echo "egressip_flags: ${egressip_enabled_flag}, ${egressip_healthcheck_port_flag}" + hybrid_overlay_flags= if [[ ${ovn_hybrid_overlay_enable} == "true" ]]; then hybrid_overlay_flags="--enable-hybrid-overlay" @@ -1329,6 +1340,8 @@ ovn-cluster-manager() { --logfile /var/log/ovn-kubernetes/ovnkube-cluster-manager.log \ ${ovnkube_metrics_tls_opts} \ ${multicast_enabled_flag} \ + ${egressip_enabled_flag} \ + ${egressip_healthcheck_port_flag} \ ${multi_network_enabled_flag} \ --metrics-bind-address ${ovnkube_cluster_manager_metrics_bind_address} \ --host-network-namespace ${ovn_host_network_namespace} & From 79a9b52bd9ea0fc7970653677d4bbcd4bf73f7f7 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Sat, 25 Mar 2023 17:14:50 +0100 Subject: [PATCH 02/73] Add egressIP factory to cluster-manager This commit does three things: 1) It adds eipFactory to CMWatchFactory which is required for EIP 2) It adds EIPClient and CloudNetworkClient to CMClientset CM = cluster manager This plubming is the foundation to be able to move egressIP bits into CM. Signed-off-by: Surya Seetharaman --- go-controller/pkg/factory/factory.go | 23 ++++++++++++++++++++--- go-controller/pkg/util/kube.go | 4 ++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index ac8b3b039c..36264388c6 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -444,16 +444,33 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( // mode process. func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset) (*WatchFactory, error) { wf := &WatchFactory{ - iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), - informers: make(map[reflect.Type]*informer), - stopChan: make(chan struct{}), + iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), + eipFactory: egressipinformerfactory.NewSharedInformerFactory(ovnClientset.EgressIPClient, resyncInterval), + cpipcFactory: ocpcloudnetworkinformerfactory.NewSharedInformerFactory(ovnClientset.CloudNetworkClient, resyncInterval), + informers: make(map[reflect.Type]*informer), + stopChan: make(chan struct{}), } + if err := egressipapi.AddToScheme(egressipscheme.Scheme); err != nil { + return nil, err + } var err error wf.informers[NodeType], err = newInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer()) if err != nil { return nil, err } + if config.OVNKubernetesFeature.EnableEgressIP { + wf.informers[EgressIPType], err = newInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer()) + if err != nil { + return nil, err + } + } + if util.PlatformTypeIsEgressIPCloudProvider() { + wf.informers[CloudPrivateIPConfigType], err = newInformer(CloudPrivateIPConfigType, wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer()) + if err != nil { + return nil, err + } + } return wf, nil } diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 511e968843..4423c799d1 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -68,6 +68,8 @@ type OVNNodeClientset struct { type OVNClusterManagerClientset struct { KubeClient kubernetes.Interface + EgressIPClient egressipclientset.Interface + CloudNetworkClient ocpcloudnetworkclientset.Interface NetworkAttchDefClient networkattchmentdefclientset.Interface } @@ -86,6 +88,8 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { func (cs *OVNClientset) GetClusterManagerClientset() *OVNClusterManagerClientset { return &OVNClusterManagerClientset{ KubeClient: cs.KubeClient, + EgressIPClient: cs.EgressIPClient, + CloudNetworkClient: cs.CloudNetworkClient, NetworkAttchDefClient: cs.NetworkAttchDefClient, } } From 744f0972ddfdd437207d48f5450e6800c7a54c2f Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Sat, 25 Mar 2023 17:15:37 +0100 Subject: [PATCH 03/73] Add utility DeleteLogicalRouterStaticRoutesWithPredicateOps This commit adds DeleteLogicalRouterStaticRoutesWithPredicateOps to libovsdbops package. Note that we already had DeleteLogicalRouterStaticRoutesWithPredicate and now we want to just have a util that returns the ops so that we can batch this when doing a deletion for EIP. NOTE: This commit only introduces the utility, its use comes down in future commit. Signed-off-by: Surya Seetharaman --- go-controller/pkg/libovsdbops/router.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/libovsdbops/router.go b/go-controller/pkg/libovsdbops/router.go index bc48f5508a..d34852e4f7 100644 --- a/go-controller/pkg/libovsdbops/router.go +++ b/go-controller/pkg/libovsdbops/router.go @@ -678,6 +678,20 @@ func CreateOrReplaceLogicalRouterStaticRouteWithPredicate(nbClient libovsdbclien // routes from the cache based on a given predicate, deletes them and removes // them from the provided logical router func DeleteLogicalRouterStaticRoutesWithPredicate(nbClient libovsdbclient.Client, routerName string, p logicalRouterStaticRoutePredicate) error { + var ops []libovsdb.Operation + var err error + ops, err = DeleteLogicalRouterStaticRoutesWithPredicateOps(nbClient, ops, routerName, p) + if err != nil { + return err + } + _, err = TransactAndCheck(nbClient, ops) + return err +} + +// DeleteLogicalRouterStaticRoutesWithPredicateOps looks up logical router static +// routes from the cache based on a given predicate, and returns the ops to delete +// them and remove them from the provided logical router +func DeleteLogicalRouterStaticRoutesWithPredicateOps(nbClient libovsdbclient.Client, ops []libovsdb.Operation, routerName string, p logicalRouterStaticRoutePredicate) ([]libovsdb.Operation, error) { router := &nbdb.LogicalRouter{ Name: routerName, } @@ -700,7 +714,7 @@ func DeleteLogicalRouterStaticRoutesWithPredicate(nbClient libovsdbclient.Client } m := newModelClient(nbClient) - return m.Delete(opModels...) + return m.DeleteOps(ops, opModels...) } // DeleteLogicalRouterStaticRoutes deletes the logical router static routes and From 665b934620c62c3ca66a9f14a77861021ecd3db8 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Sat, 25 Mar 2023 17:20:02 +0100 Subject: [PATCH 04/73] Split egressip into global and zone controllers This commit does the following: Cluster Manager side: 1) We create a new egressIPController and call that from CM if the feature is enabled 2) We do only WatchEgressNodes, WatchEgressIP and WatchCloudPrivateIPConfig from CM (namespaces, pods will be taken care of by master) 3) We move all the eip healthcheck bits and node allocator bits to CM 4) Any updates to EIP CRD will always be done by CM; master side will only read the CRD 5) All bits for cloud private IP config was moved to CM from master. Master side: 1) We rename egressIPController to egressIPZoneController - this is responsible for doing NBDB configurations for everything happening in its zone and sometimes for pods in remote zones when necessary 2) We add a localZoneNodes similar to what we have in base which holds the list of local zone nodes 3) master will be responsible for adding snats, reroute policies and static routes for eip pods High level logic: - Do SNATs towards egressIP only if egressNode is local to the zone irrespective of whether the pod is local or remote - Do reroute policies towards transit switch only if pod is local to the zone. NOTE: In IC, nexthop is transit switchIP NOT joinIP - Do static routes towards joinIP only if pod is non-local to the zone but egressnode is local to the zone. NOTE: This is NEW, we never used static routes for EIP before. It is an IC only thing. - Do SNATs towards nodeIP only if pod is local to the zone - Duplicate the code for `IsReachableLegacy` and `IsReachableViaGRPC` functions which will now be in both CM and master - but this is temporary to ensure ESVC works in non-IC setup and until we do ESVC refactor for IC. Tests: The commit also fixes unit tests to run in non-ic and ic modes and splits the tests for global controller logic into cluster-manager while keeping the tests to check libovsdb objects in master itself. Signed-off-by: Surya Seetharaman --- .../pkg/clustermanager/clustermanager.go | 15 + .../pkg/clustermanager/egressip_controller.go | 1505 ++ .../egressip_controller_test.go | 2349 +++ .../clustermanager/egressip_event_handler.go | 263 + .../fake_cluster_manager_test.go | 65 + go-controller/pkg/factory/factory.go | 7 - go-controller/pkg/factory/factory_test.go | 15 +- go-controller/pkg/ovn/base_event_handler.go | 8 +- .../egress_services/egress_services_node.go | 85 + .../pkg/ovn/default_network_controller.go | 96 +- go-controller/pkg/ovn/egressip.go | 2300 +-- go-controller/pkg/ovn/egressip_test.go | 12157 +++++++--------- go-controller/pkg/ovn/ovn.go | 16 +- go-controller/pkg/ovn/ovn_test.go | 17 +- go-controller/pkg/syncmap/syncmap.go | 8 + go-controller/pkg/util/kube.go | 2 - test/e2e/egressip.go | 28 +- test/e2e/util.go | 5 + 18 files changed, 10309 insertions(+), 8632 deletions(-) create mode 100644 go-controller/pkg/clustermanager/egressip_controller.go create mode 100644 go-controller/pkg/clustermanager/egressip_controller_test.go create mode 100644 go-controller/pkg/clustermanager/egressip_event_handler.go create mode 100644 go-controller/pkg/clustermanager/fake_cluster_manager_test.go diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index 65a53e5e6d..a32abbed75 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -31,6 +31,9 @@ type ClusterManager struct { wf *factory.WatchFactory wg *sync.WaitGroup secondaryNetClusterManager *secondaryNetworkClusterManager + // Controller used for programming node allocation for egress IP + // The OVN DB setup is handled by egressIPZoneController that runs in ovnkube-controller + eIPC *egressIPClusterController // event recorder used to post events to k8s recorder record.EventRecorder @@ -66,6 +69,9 @@ func NewClusterManager(ovnClient *util.OVNClusterManagerClientset, wf *factory.W return nil, err } } + if config.OVNKubernetesFeature.EnableEgressIP { + cm.eIPC = newEgressIPController(ovnClient, wf, recorder) + } return cm, nil } @@ -93,6 +99,12 @@ func (cm *ClusterManager) Start(ctx context.Context) error { } } + if config.OVNKubernetesFeature.EnableEgressIP { + if err := cm.eIPC.Start(); err != nil { + return err + } + } + return nil } @@ -104,5 +116,8 @@ func (cm *ClusterManager) Stop() { if config.OVNKubernetesFeature.EnableMultiNetwork { cm.secondaryNetClusterManager.Stop() } + if config.OVNKubernetesFeature.EnableEgressIP { + cm.eIPC.Stop() + } metrics.UnregisterClusterManagerFunctional() } diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go new file mode 100644 index 0000000000..f2d8d0587f --- /dev/null +++ b/go-controller/pkg/clustermanager/egressip_controller.go @@ -0,0 +1,1505 @@ +package clustermanager + +import ( + "context" + "encoding/hex" + "encoding/json" + "fmt" + "net" + "os" + "reflect" + "sort" + "strings" + "sync" + "syscall" + "time" + + ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" + objretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/retry" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" +) + +const ( + egressIPReachabilityCheckInterval = 5 * time.Second +) + +type egressIPHealthcheckClientAllocator struct{} + +func (hccAlloc *egressIPHealthcheckClientAllocator) allocate(nodeName string) healthcheck.EgressIPHealthClient { + return healthcheck.NewEgressIPHealthClient(nodeName) +} + +func isReachableViaGRPC(mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient, healthCheckPort, totalTimeout int) bool { + dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Duration(totalTimeout)*time.Second) + defer dialCancel() + + if !healthClient.IsConnected() { + // gRPC session is not up. Attempt to connect and if that suceeds, we will declare node as reacheable. + return healthClient.Connect(dialCtx, mgmtIPs, healthCheckPort) + } + + // gRPC session is already established. Send a probe, which will succeed, or close the session. + return healthClient.Probe(dialCtx) +} + +type egressIPDialer interface { + dial(ip net.IP, timeout time.Duration) bool +} + +type egressIPDial struct{} + +var dialer egressIPDialer = &egressIPDial{} + +type healthcheckClientAllocator interface { + allocate(nodeName string) healthcheck.EgressIPHealthClient +} + +// Blantant copy from: https://github.com/openshift/sdn/blob/master/pkg/network/common/egressip.go#L499-L505 +// Ping a node and return whether or not we think it is online. We do this by trying to +// open a TCP connection to the "discard" service (port 9); if the node is offline, the +// attempt will either time out with no response, or else return "no route to host" (and +// we will return false). If the node is online then we presumably will get a "connection +// refused" error; but the code below assumes that anything other than timeout or "no +// route" indicates that the node is online. +func (e *egressIPDial) dial(ip net.IP, timeout time.Duration) bool { + conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip.String(), "9"), timeout) + if conn != nil { + conn.Close() + } + if opErr, ok := err.(*net.OpError); ok { + if opErr.Timeout() { + return false + } + if sysErr, ok := opErr.Err.(*os.SyscallError); ok && sysErr.Err == syscall.EHOSTUNREACH { + return false + } + } + return true +} + +var hccAllocator healthcheckClientAllocator = &egressIPHealthcheckClientAllocator{} + +// egressNode is a cache helper used for egress IP assignment, representing an egress node +type egressNode struct { + egressIPConfig *util.ParsedNodeEgressIPConfiguration + mgmtIPs []net.IP + allocations map[string]string + healthClient healthcheck.EgressIPHealthClient + isReady bool + isReachable bool + isEgressAssignable bool + name string +} + +func (e *egressNode) getAllocationCountForEgressIP(name string) (count int) { + for _, egressIPName := range e.allocations { + if egressIPName == name { + count++ + } + } + return +} + +// isAnyClusterNodeIP verifies that the IP is not any node IP. +func (eIPC *egressIPClusterController) isAnyClusterNodeIP(ip net.IP) *egressNode { + for _, eNode := range eIPC.allocator.cache { + if ip.Equal(eNode.egressIPConfig.V6.IP) || ip.Equal(eNode.egressIPConfig.V4.IP) { + return eNode + } + } + return nil +} + +type EgressIPPatchStatus struct { + Op string `json:"op"` + Path string `json:"path"` + Value egressipv1.EgressIPStatus `json:"value"` +} + +// patchReplaceEgressIPStatus performs a replace patch operation of the egress +// IP status by replacing the status with the provided value. This allows us to +// update only the status field, without overwriting any other. This is +// important because processing egress IPs can take a while (when running on a +// public cloud and in the worst case), hence we don't want to perform a full +// object update which risks resetting the EgressIP object's fields to the state +// they had when we started processing the change. +func (eIPC *egressIPClusterController) patchReplaceEgressIPStatus(name string, statusItems []egressipv1.EgressIPStatusItem) error { + klog.Infof("Patching status on EgressIP %s: %v", name, statusItems) + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + t := []EgressIPPatchStatus{ + { + Op: "replace", + Path: "/status", + Value: egressipv1.EgressIPStatus{ + Items: statusItems, + }, + }, + } + op, err := json.Marshal(&t) + if err != nil { + return fmt.Errorf("error serializing status patch operation: %+v, err: %v", statusItems, err) + } + return eIPC.kube.PatchEgressIP(name, op) + }) +} + +func (eIPC *egressIPClusterController) getAllocationTotalCount() float64 { + count := 0 + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + for _, eNode := range eIPC.allocator.cache { + count += len(eNode.allocations) + } + return float64(count) +} + +type allocator struct { + *sync.Mutex + // A cache used for egress IP assignments containing data for all cluster nodes + // used for egress IP assignments + cache map[string]*egressNode +} + +type cloudPrivateIPConfigOp struct { + toAdd string + toDelete string +} + +// ipStringToCloudPrivateIPConfigName converts the net.IP string representation +// to a CloudPrivateIPConfig compatible name. + +// The string representation of the IPv6 address fc00:f853:ccd:e793::54 will be +// represented as: fc00.f853.0ccd.e793.0000.0000.0000.0054 + +// We thus need to fully expand the IP string and replace every fifth +// character's colon with a dot. +func ipStringToCloudPrivateIPConfigName(ipString string) (name string) { + ip := net.ParseIP(ipString) + if ip.To4() != nil { + return ipString + } + dst := make([]byte, hex.EncodedLen(len(ip))) + hex.Encode(dst, ip) + for i := 0; i < len(dst); i += 4 { + if len(dst)-i == 4 { + name += string(dst[i : i+4]) + } else { + name += string(dst[i:i+4]) + "." + } + } + return +} + +func (eIPC *egressIPClusterController) executeCloudPrivateIPConfigOps(egressIPName string, ops map[string]*cloudPrivateIPConfigOp) error { + for egressIP, op := range ops { + cloudPrivateIPConfigName := ipStringToCloudPrivateIPConfigName(egressIP) + cloudPrivateIPConfig, err := eIPC.watchFactory.GetCloudPrivateIPConfig(cloudPrivateIPConfigName) + // toAdd and toDelete is non-empty, this indicates an UPDATE for which + // the object **must** exist, if not: that's an error. + if op.toAdd != "" && op.toDelete != "" { + if err != nil { + return fmt.Errorf("cloud update request failed for CloudPrivateIPConfig: %s, could not get item, err: %v", cloudPrivateIPConfigName, err) + } + // Do not update if object is being deleted + if !cloudPrivateIPConfig.GetDeletionTimestamp().IsZero() { + return fmt.Errorf("cloud update request failed, CloudPrivateIPConfig: %s is being deleted", cloudPrivateIPConfigName) + } + cloudPrivateIPConfig.Spec.Node = op.toAdd + if _, err := eIPC.kube.UpdateCloudPrivateIPConfig(cloudPrivateIPConfig); err != nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: egressIPName, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "CloudUpdateFailed", "egress IP: %s for object EgressIP: %s could not be updated, err: %v", egressIP, egressIPName, err) + return fmt.Errorf("cloud update request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) + } + // toAdd is non-empty, this indicates an ADD + // if the object already exists for the specified node that's a no-op + // if the object already exists and the request is for a different node, that's an error + } else if op.toAdd != "" { + if err == nil { + if op.toAdd == cloudPrivateIPConfig.Spec.Node { + klog.Infof("CloudPrivateIPConfig: %s already assigned to node: %s", cloudPrivateIPConfigName, cloudPrivateIPConfig.Spec.Node) + continue + } + return fmt.Errorf("cloud create request failed for CloudPrivateIPConfig: %s, err: item exists", cloudPrivateIPConfigName) + } + cloudPrivateIPConfig := ocpcloudnetworkapi.CloudPrivateIPConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: cloudPrivateIPConfigName, + Annotations: map[string]string{ + util.OVNEgressIPOwnerRefLabel: egressIPName, + }, + }, + Spec: ocpcloudnetworkapi.CloudPrivateIPConfigSpec{ + Node: op.toAdd, + }, + } + if _, err := eIPC.kube.CreateCloudPrivateIPConfig(&cloudPrivateIPConfig); err != nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: egressIPName, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "CloudAssignmentFailed", "egress IP: %s for object EgressIP: %s could not be created, err: %v", egressIP, egressIPName, err) + return fmt.Errorf("cloud add request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) + } + // toDelete is non-empty, this indicates a DELETE - if the object does not exist, log an Info message and continue with the next op. + // The reason for why we are not throwing an error here is that desired state (deleted) == isState (object not found). + // If for whatever reason we have a pending toDelete op for a deleted object, then this op should simply be silently ignored. + // Any other error, return an error to trigger a retry. + } else if op.toDelete != "" { + if err != nil { + if apierrors.IsNotFound(err) { + klog.Infof("Cloud deletion request failed for CloudPrivateIPConfig: %s, item already deleted, err: %v", cloudPrivateIPConfigName, err) + continue + } else { + return fmt.Errorf("cloud deletion request failed for CloudPrivateIPConfig: %s, could not get item, err: %v", cloudPrivateIPConfigName, err) + } + } + if err := eIPC.kube.DeleteCloudPrivateIPConfig(cloudPrivateIPConfigName); err != nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: egressIPName, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "CloudDeletionFailed", "egress IP: %s for object EgressIP: %s could not be deleted, err: %v", egressIP, egressIPName, err) + return fmt.Errorf("cloud deletion request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) + } + } + } + return nil +} + +// executeCloudPrivateIPConfigChange computes a diff between what needs to be +// assigned/removed and executes the object modification afterwards. +// Specifically: if one egress IP is moved from nodeA to nodeB, we actually care +// about an update on the CloudPrivateIPConfig object represented by that egress +// IP, cloudPrivateIPConfigOp is a helper used to determine that sort of +// operations from toAssign/toRemove +func (eIPC *egressIPClusterController) executeCloudPrivateIPConfigChange(egressIPName string, toAssign, toRemove []egressipv1.EgressIPStatusItem) error { + eIPC.pendingCloudPrivateIPConfigsMutex.Lock() + defer eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() + ops := make(map[string]*cloudPrivateIPConfigOp, len(toAssign)+len(toRemove)) + for _, assignment := range toAssign { + ops[assignment.EgressIP] = &cloudPrivateIPConfigOp{ + toAdd: assignment.Node, + } + } + for _, removal := range toRemove { + if op, exists := ops[removal.EgressIP]; exists { + op.toDelete = removal.Node + } else { + ops[removal.EgressIP] = &cloudPrivateIPConfigOp{ + toDelete: removal.Node, + } + } + } + // Merge ops into the existing pendingCloudPrivateIPConfigsOps. + // This allows us to: + // a) execute only the new ops + // b) keep track of any pending changes + if len(ops) > 0 { + if _, ok := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName]; !ok { + // Set all operations for the EgressIP object if none are in the cache currently. + eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] = ops + } else { + for cloudPrivateIP, op := range ops { + if _, ok := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP]; !ok { + // If this specific EgressIP object's CloudPrivateIPConfig address currently has no + // op, simply set it. + eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP] = op + } else { + // If an existing operation for this CloudPrivateIP exists, then the following logic should + // apply: + // If toDelete is currently set: keep the current toDelete. Theoretically, the oldest toDelete + // is the good one. If toDelete if currently not set, overwrite it with the new value. + // If toAdd is currently set: overwrite with the new toAdd. Theoretically, the newest toAdd is + // the good one. + // Therefore, only replace toAdd over a previously existing op and only replace toDelete if + // it's unset. + if op.toAdd != "" { + eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toAdd = op.toAdd + } + if eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toDelete == "" { + eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toDelete = op.toDelete + } + } + } + } + } + return eIPC.executeCloudPrivateIPConfigOps(egressIPName, ops) +} + +type egressIPClusterController struct { + recorder record.EventRecorder + stopChan chan struct{} + wg *sync.WaitGroup + kube *kube.KubeOVN + // egressIPAssignmentMutex is used to ensure a safe updates between + // concurrent go-routines which could be modifying the egress IP status + // assignment simultaneously. Currently WatchEgressNodes and WatchEgressIP + // run two separate go-routines which do this. + egressIPAssignmentMutex *sync.Mutex + // pendingCloudPrivateIPConfigsMutex is used to ensure synchronized access + // to pendingCloudPrivateIPConfigsOps which is accessed by the egress IP and + // cloudPrivateIPConfig go-routines + pendingCloudPrivateIPConfigsMutex *sync.Mutex + // pendingCloudPrivateIPConfigsOps is a cache of pending + // CloudPrivateIPConfig changes that we are waiting on an answer for. Items + // in this map are only ever removed once the op is fully finished and we've + // been notified of this. That means: + // - On add operations we only delete once we've seen that the + // CloudPrivateIPConfig is fully added. + // - On delete: when it's fully deleted. + // - On update: once we finish processing the add - which comes after the + // delete. + pendingCloudPrivateIPConfigsOps map[string]map[string]*cloudPrivateIPConfigOp + // allocator is a cache of egress IP centric data needed to when both route + // health-checking and tracking allocations made + allocator allocator + // watchFactory watching k8s objects + watchFactory *factory.WatchFactory + // EgressIP Node reachability total timeout configuration + egressIPTotalTimeout int + // reachability check interval + reachabilityCheckInterval time.Duration + // EgressIP Node reachability gRPC port (0 means it should use dial instead) + egressIPNodeHealthCheckPort int + // retry framework for Egress nodes + retryEgressNodes *objretry.RetryFramework + // retry framework for egress IP + retryEgressIPs *objretry.RetryFramework + // retry framework for Cloud private IP config + retryCloudPrivateIPConfig *objretry.RetryFramework + // egressNodes events factory handler + egressNodeHandler *factory.Handler + // egressIP events factory handler + egressIPHandler *factory.Handler + // cloudPrivateIPConfig events factory handler + cloudPrivateIPConfigHandler *factory.Handler +} + +func newEgressIPController(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, recorder record.EventRecorder) *egressIPClusterController { + kube := &kube.KubeOVN{ + Kube: kube.Kube{KClient: ovnClient.KubeClient}, + EIPClient: ovnClient.EgressIPClient, + CloudNetworkClient: ovnClient.CloudNetworkClient, + } + wg := &sync.WaitGroup{} + eIPC := &egressIPClusterController{ + kube: kube, + wg: wg, + egressIPAssignmentMutex: &sync.Mutex{}, + pendingCloudPrivateIPConfigsMutex: &sync.Mutex{}, + pendingCloudPrivateIPConfigsOps: make(map[string]map[string]*cloudPrivateIPConfigOp), + allocator: allocator{&sync.Mutex{}, make(map[string]*egressNode)}, + watchFactory: wf, + recorder: recorder, + egressIPTotalTimeout: config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout, + reachabilityCheckInterval: egressIPReachabilityCheckInterval, + egressIPNodeHealthCheckPort: config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort, + } + eIPC.initRetryFramework() + return eIPC +} + +func (eIPC *egressIPClusterController) initRetryFramework() { + eIPC.retryEgressNodes = eIPC.newRetryFramework(factory.EgressNodeType) + eIPC.retryEgressIPs = eIPC.newRetryFramework(factory.EgressIPType) + if util.PlatformTypeIsEgressIPCloudProvider() { + eIPC.retryCloudPrivateIPConfig = eIPC.newRetryFramework(factory.CloudPrivateIPConfigType) + } +} + +func (eIPC *egressIPClusterController) newRetryFramework(objectType reflect.Type) *objretry.RetryFramework { + eventHandler := &egressIPClusterControllerEventHandler{ + objType: objectType, + eIPC: eIPC, + syncFunc: nil, + } + resourceHandler := &objretry.ResourceHandler{ + HasUpdateFunc: true, // all egressIP types have update func + NeedsUpdateDuringRetry: true, // true for all egressIP types + ObjType: objectType, + EventHandler: eventHandler, + } + return objretry.NewRetryFramework(eIPC.stopChan, eIPC.wg, eIPC.watchFactory, resourceHandler) +} + +func (eIPC *egressIPClusterController) Start() error { + var err error + // In cluster manager, we only need to watch for egressNodes, egressIPs + // and cloudPrivateIPConfig + if eIPC.egressNodeHandler, err = eIPC.WatchEgressNodes(); err != nil { + return fmt.Errorf("unable to watch egress nodes %w", err) + } + if eIPC.egressIPHandler, err = eIPC.WatchEgressIP(); err != nil { + return err + } + if util.PlatformTypeIsEgressIPCloudProvider() { + if eIPC.cloudPrivateIPConfigHandler, err = eIPC.WatchCloudPrivateIPConfig(); err != nil { + return err + } + } + if config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout == 0 { + klog.V(2).Infof("EgressIP node reachability check disabled") + } else if config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort != 0 { + klog.Infof("EgressIP node reachability enabled and using gRPC port %d", + config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort) + } + return nil +} + +// WatchEgressNodes starts the watching of egress assignable nodes and calls +// back the appropriate handler logic. +func (eIPC *egressIPClusterController) WatchEgressNodes() (*factory.Handler, error) { + return eIPC.retryEgressNodes.WatchResource() +} + +// WatchCloudPrivateIPConfig starts the watching of cloudprivateipconfigs +// resource and calls back the appropriate handler logic. +func (eIPC *egressIPClusterController) WatchCloudPrivateIPConfig() (*factory.Handler, error) { + return eIPC.retryCloudPrivateIPConfig.WatchResource() +} + +// WatchEgressIP starts the watching of egressip resource and calls back the +// appropriate handler logic. It also initiates the other dedicated resource +// handlers for egress IP setup: namespaces, pods. +func (eIPC *egressIPClusterController) WatchEgressIP() (*factory.Handler, error) { + return eIPC.retryEgressIPs.WatchResource() +} + +func (eIPC *egressIPClusterController) Stop() { + close(eIPC.stopChan) + eIPC.wg.Wait() + if eIPC.egressNodeHandler != nil { + eIPC.watchFactory.RemoveNodeHandler(eIPC.egressNodeHandler) + } + if eIPC.egressIPHandler != nil { + eIPC.watchFactory.RemoveEgressIPHandler(eIPC.egressIPHandler) + } + if eIPC.cloudPrivateIPConfigHandler != nil { + eIPC.watchFactory.RemoveCloudPrivateIPConfigHandler(eIPC.cloudPrivateIPConfigHandler) + } +} + +type egressIPNodeStatus struct { + Node string + Name string +} + +// getSortedEgressData returns a sorted slice of all egressNodes based on the +// amount of allocations found in the cache +func (eIPC *egressIPClusterController) getSortedEgressData() ([]*egressNode, map[string]egressIPNodeStatus) { + assignableNodes := []*egressNode{} + allAllocations := make(map[string]egressIPNodeStatus) + for _, eNode := range eIPC.allocator.cache { + if eNode.isEgressAssignable && eNode.isReady && eNode.isReachable { + assignableNodes = append(assignableNodes, eNode) + } + for ip, eipName := range eNode.allocations { + allAllocations[ip] = egressIPNodeStatus{Node: eNode.name, Name: eipName} + } + } + sort.Slice(assignableNodes, func(i, j int) bool { + return len(assignableNodes[i].allocations) < len(assignableNodes[j].allocations) + }) + return assignableNodes, allAllocations +} + +func (eIPC *egressIPClusterController) initEgressNodeReachability(nodes []interface{}) error { + go eIPC.checkEgressNodesReachability() + return nil +} + +func (eIPC *egressIPClusterController) setNodeEgressAssignable(nodeName string, isAssignable bool) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if eNode, exists := eIPC.allocator.cache[nodeName]; exists { + eNode.isEgressAssignable = isAssignable + // if the node is not assignable/ready/reachable anymore we need to + // empty all of it's allocations from our cache since we'll clear all + // assignments from this node later on, because of this. + if !isAssignable { + eNode.allocations = make(map[string]string) + } + } +} + +func (eIPC *egressIPClusterController) isEgressNodeReady(egressNode *v1.Node) bool { + for _, condition := range egressNode.Status.Conditions { + if condition.Type == v1.NodeReady { + return condition.Status == v1.ConditionTrue + } + } + return false +} + +func isReachableLegacy(node string, mgmtIPs []net.IP, totalTimeout int) bool { + var retryTimeOut, initialRetryTimeOut time.Duration + + numMgmtIPs := len(mgmtIPs) + if numMgmtIPs == 0 { + return false + } + + switch totalTimeout { + // Check if we need to do node reachability check + case 0: + return true + case 1: + // Using time duration for initial retry with 700/numIPs msec and retry of 100/numIPs msec + // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. + initialRetryTimeOut = time.Duration(700/numMgmtIPs) * time.Millisecond + retryTimeOut = time.Duration(100/numMgmtIPs) * time.Millisecond + default: + // Using time duration for initial retry with 900/numIPs msec + // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. + initialRetryTimeOut = time.Duration(900/numMgmtIPs) * time.Millisecond + retryTimeOut = initialRetryTimeOut + } + + timeout := initialRetryTimeOut + endTime := time.Now().Add(time.Second * time.Duration(totalTimeout)) + for time.Now().Before(endTime) { + for _, ip := range mgmtIPs { + if dialer.dial(ip, timeout) { + return true + } + } + time.Sleep(100 * time.Millisecond) + timeout = retryTimeOut + } + klog.Errorf("Failed reachability check for %s", node) + return false +} + +// checkEgressNodesReachability continuously checks if all nodes used for egress +// IP assignment are reachable, and updates the nodes following the result. This +// is important because egress IP is based upon routing traffic to these nodes, +// and if they aren't reachable we shouldn't be using them for egress IP. +func (eIPC *egressIPClusterController) checkEgressNodesReachability() { + timer := time.NewTicker(eIPC.reachabilityCheckInterval) + defer timer.Stop() + for { + select { + case <-timer.C: + checkEgressNodesReachabilityIterate(eIPC) + case <-eIPC.stopChan: + klog.V(5).Infof("Stop channel got triggered: will stop checkEgressNodesReachability") + return + } + } +} + +func checkEgressNodesReachabilityIterate(eIPC *egressIPClusterController) { + reAddOrDelete := map[string]bool{} + eIPC.allocator.Lock() + for _, eNode := range eIPC.allocator.cache { + if eNode.isEgressAssignable && eNode.isReady { + wasReachable := eNode.isReachable + isReachable := eIPC.isReachable(eNode.name, eNode.mgmtIPs, eNode.healthClient) + if wasReachable && !isReachable { + reAddOrDelete[eNode.name] = true + } else if !wasReachable && isReachable { + reAddOrDelete[eNode.name] = false + } + eNode.isReachable = isReachable + } else { + // End connection (if there is one). This is important because + // it accounts for cases where node is not labelled with + // egress-assignable, so connection is no longer needed. Calling + // this on a already disconnected node is expected to be cheap. + eNode.healthClient.Disconnect() + } + } + eIPC.allocator.Unlock() + for nodeName, shouldDelete := range reAddOrDelete { + if shouldDelete { + metrics.RecordEgressIPUnreachableNode() + klog.Warningf("Node: %s is detected as unreachable, deleting it from egress assignment", nodeName) + if err := eIPC.deleteEgressNode(nodeName); err != nil { + klog.Errorf("Node: %s is detected as unreachable, but could not re-assign egress IPs, err: %v", nodeName, err) + } + } else { + klog.Infof("Node: %s is detected as reachable and ready again, adding it to egress assignment", nodeName) + if err := eIPC.addEgressNode(nodeName); err != nil { + klog.Errorf("Node: %s is detected as reachable and ready again, but could not re-assign egress IPs, err: %v", nodeName, err) + } + } + } +} + +func (eIPC *egressIPClusterController) isReachable(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool { + // Check if we need to do node reachability check + if eIPC.egressIPTotalTimeout == 0 { + return true + } + + if eIPC.egressIPNodeHealthCheckPort == 0 { + return isReachableLegacy(nodeName, mgmtIPs, eIPC.egressIPTotalTimeout) + } + return isReachableViaGRPC(mgmtIPs, healthClient, eIPC.egressIPNodeHealthCheckPort, eIPC.egressIPTotalTimeout) +} + +func (eIPC *egressIPClusterController) isEgressNodeReachable(egressNode *v1.Node) bool { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if eNode, exists := eIPC.allocator.cache[egressNode.Name]; exists { + return eNode.isReachable || eIPC.isReachable(eNode.name, eNode.mgmtIPs, eNode.healthClient) + } + return false +} + +func (eIPC *egressIPClusterController) setNodeEgressReady(nodeName string, isReady bool) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if eNode, exists := eIPC.allocator.cache[nodeName]; exists { + eNode.isReady = isReady + // see setNodeEgressAssignable + if !isReady { + eNode.allocations = make(map[string]string) + } + } +} + +func (eIPC *egressIPClusterController) setNodeEgressReachable(nodeName string, isReachable bool) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if eNode, exists := eIPC.allocator.cache[nodeName]; exists { + eNode.isReachable = isReachable + // see setNodeEgressAssignable + if !isReachable { + eNode.allocations = make(map[string]string) + } + } +} + +func (eIPC *egressIPClusterController) addEgressNode(nodeName string) error { + var errors []error + klog.V(5).Infof("Egress node: %s about to be initialized", nodeName) + + // If a node has been labelled for egress IP we need to check if there are any + // egress IPs which are missing an assignment. If there are, we need to send a + // synthetic update since reconcileEgressIP will then try to assign those IPs to + // this node (if possible) + egressIPs, err := eIPC.kube.GetEgressIPs() + if err != nil { + return fmt.Errorf("unable to list EgressIPs, err: %v", err) + } + for _, egressIP := range egressIPs.Items { + if len(egressIP.Spec.EgressIPs) != len(egressIP.Status.Items) { + // Send a "synthetic update" on all egress IPs which are not fully + // assigned, the reconciliation loop for WatchEgressIP will try to + // assign stuff to this new node. The workqueue's delta FIFO + // implementation will not trigger a watch event for updates on + // objects which have no semantic difference, hence: call the + // reconciliation function directly. + if err := eIPC.reconcileEgressIP(nil, &egressIP); err != nil { + errors = append(errors, fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", egressIP.Name, err)) + } + } + } + + if len(errors) > 0 { + return utilerrors.NewAggregate(errors) + } + return nil +} + +// deleteNodeForEgress remove the default allow logical router policies for the +// node and removes the node from the allocator cache. +func (eIPC *egressIPClusterController) deleteNodeForEgress(node *v1.Node) { + eIPC.allocator.Lock() + if eNode, exists := eIPC.allocator.cache[node.Name]; exists { + eNode.healthClient.Disconnect() + } + delete(eIPC.allocator.cache, node.Name) + eIPC.allocator.Unlock() +} + +func (eIPC *egressIPClusterController) deleteEgressNode(nodeName string) error { + var errorAggregate []error + klog.V(5).Infof("Egress node: %s about to be removed", nodeName) + // Since the node has been labelled as "not usable" for egress IP + // assignments we need to find all egress IPs which have an assignment to + // it, and move them elsewhere. + egressIPs, err := eIPC.kube.GetEgressIPs() + if err != nil { + return fmt.Errorf("unable to list EgressIPs, err: %v", err) + } + for _, egressIP := range egressIPs.Items { + for _, status := range egressIP.Status.Items { + if status.Node == nodeName { + // Send a "synthetic update" on all egress IPs which have an + // assignment to this node. The reconciliation loop for + // WatchEgressIP will see that the current assignment status to + // this node is invalid and try to re-assign elsewhere. The + // workqueue's delta FIFO implementation will not trigger a + // watch event for updates on objects which have no semantic + // difference, hence: call the reconciliation function directly. + if err := eIPC.reconcileEgressIP(nil, &egressIP); err != nil { + errorAggregate = append(errorAggregate, fmt.Errorf("re-assignment for EgressIP: %s failed, unable to update object, err: %v", egressIP.Name, err)) + } + break + } + } + } + if len(errorAggregate) > 0 { + return utilerrors.NewAggregate(errorAggregate) + } + return nil +} + +func (eIPC *egressIPClusterController) initEgressIPAllocator(node *v1.Node) (err error) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + if _, exists := eIPC.allocator.cache[node.Name]; !exists { + var parsedEgressIPConfig *util.ParsedNodeEgressIPConfiguration + if util.PlatformTypeIsEgressIPCloudProvider() { + parsedEgressIPConfig, err = util.ParseCloudEgressIPConfig(node) + if err != nil { + return fmt.Errorf("unable to use cloud node for egress assignment, err: %v", err) + } + } else { + parsedEgressIPConfig, err = util.ParseNodePrimaryIfAddr(node) + if err != nil { + return fmt.Errorf("unable to use node for egress assignment, err: %v", err) + } + } + nodeSubnets, err := util.ParseNodeHostSubnetAnnotation(node, types.DefaultNetworkName) + if err != nil { + return fmt.Errorf("failed to parse node %s subnets annotation %v", node.Name, err) + } + mgmtIPs := make([]net.IP, len(nodeSubnets)) + for i, subnet := range nodeSubnets { + mgmtIPs[i] = util.GetNodeManagementIfAddr(subnet).IP + } + eIPC.allocator.cache[node.Name] = &egressNode{ + name: node.Name, + egressIPConfig: parsedEgressIPConfig, + mgmtIPs: mgmtIPs, + allocations: make(map[string]string), + healthClient: hccAllocator.allocate(node.Name), + } + } + return nil +} + +// deleteAllocatorEgressIPAssignments deletes the allocation as to keep the +// cache state correct, also see addAllocatorEgressIPAssignments +func (eIPC *egressIPClusterController) deleteAllocatorEgressIPAssignments(statusAssignments []egressipv1.EgressIPStatusItem) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + for _, status := range statusAssignments { + if eNode, exists := eIPC.allocator.cache[status.Node]; exists { + delete(eNode.allocations, status.EgressIP) + } + } +} + +// deleteAllocatorEgressIPAssignmentIfExists deletes egressIP config from node allocations map +// if the entry is available and returns assigned node name, otherwise returns empty string. +func (eIPC *egressIPClusterController) deleteAllocatorEgressIPAssignmentIfExists(name, egressIP string) string { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + for nodeName, eNode := range eIPC.allocator.cache { + if egressIPName, exists := eNode.allocations[egressIP]; exists && egressIPName == name { + delete(eNode.allocations, egressIP) + return nodeName + } + } + return "" +} + +// addAllocatorEgressIPAssignments adds the allocation to the cache, so that +// they are tracked during the life-cycle of ovnkube-master +func (eIPC *egressIPClusterController) addAllocatorEgressIPAssignments(name string, statusAssignments []egressipv1.EgressIPStatusItem) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + for _, status := range statusAssignments { + if eNode, exists := eIPC.allocator.cache[status.Node]; exists { + eNode.allocations[status.EgressIP] = name + } + } +} + +func (eIPC *egressIPClusterController) reconcileEgressIP(old, new *egressipv1.EgressIP) (err error) { + // Lock the assignment, this is needed because this function can end up + // being called from WatchEgressNodes and WatchEgressIP, i.e: two different + // go-routines and we need to make sure the assignment is safe. + eIPC.egressIPAssignmentMutex.Lock() + defer eIPC.egressIPAssignmentMutex.Unlock() + + name := "" + + // Initialize a status which will be used to compare against + // new.spec.egressIPs and decide on what from the status should get deleted + // or kept. + status := []egressipv1.EgressIPStatusItem{} + + // Initialize an empty objects as to avoid SIGSEGV. The code should play + // nicely with empty objects though. + newEIP := &egressipv1.EgressIP{} + + // Initialize a sets.String which holds egress IPs that were not fully assigned + // but are allocated and they are meant to be removed. + staleEgressIPs := sets.NewString() + if old != nil { + name = old.Name + status = old.Status.Items + staleEgressIPs.Insert(old.Spec.EgressIPs...) + } + if new != nil { + newEIP = new + name = newEIP.Name + status = newEIP.Status.Items + if staleEgressIPs.Len() > 0 { + for _, egressIP := range newEIP.Spec.EgressIPs { + if staleEgressIPs.Has(egressIP) { + staleEgressIPs.Delete(egressIP) + } + } + } + } + + // Validate the spec and use only the valid egress IPs when performing any + // successive operations, theoretically: the user could specify invalid IP + // addresses, which would break us. + validSpecIPs, err := eIPC.validateEgressIPSpec(name, newEIP.Spec.EgressIPs) + if err != nil { + return fmt.Errorf("invalid EgressIP spec, err: %v", err) + } + + // Validate the status, on restart it could be the case that what might have + // been assigned when ovnkube-master last ran is not a valid assignment + // anymore (specifically if ovnkube-master has been crashing for a while). + // Any invalid status at this point in time needs to be removed and assigned + // to a valid node. + validStatus, invalidStatus := eIPC.validateEgressIPStatus(name, status) + for status := range validStatus { + // If the spec has changed and an egress IP has been removed by the + // user: we need to un-assign that egress IP + if !validSpecIPs.Has(status.EgressIP) { + invalidStatus[status] = "" + delete(validStatus, status) + } + } + + invalidStatusLen := len(invalidStatus) + if invalidStatusLen > 0 { + metrics.RecordEgressIPRebalance(invalidStatusLen) + } + + // Add only the diff between what is requested and valid and that which + // isn't already assigned. + ipsToAssign := validSpecIPs + ipsToRemove := sets.New[string]() + statusToAdd := make([]egressipv1.EgressIPStatusItem, 0, len(ipsToAssign)) + statusToKeep := make([]egressipv1.EgressIPStatusItem, 0, len(validStatus)) + for status := range validStatus { + statusToKeep = append(statusToKeep, status) + ipsToAssign.Delete(status.EgressIP) + } + statusToRemove := make([]egressipv1.EgressIPStatusItem, 0, invalidStatusLen) + for status := range invalidStatus { + statusToRemove = append(statusToRemove, status) + ipsToRemove.Insert(status.EgressIP) + } + if ipsToRemove.Len() > 0 { + // The following is added as to ensure that we only add after having + // successfully removed egress IPs. This case is not very important on + // bare-metal (since we execute the add after the remove below, and + // hence have full control of the execution - barring its success), but + // on a cloud: we patch all validStatsuses below, we wait for the status + // on the CloudPrivateIPConfig(s) we create to be set before executing + // anything in the OVN DB (Note that the status will be set by this + // controller in cluster-manager and asynchronously the ovnkube-master + // will read the CRD change and do the necessary plumbing (ADD/UPDATE/DELETE) + // in the OVN DB). + // So, we need to make sure that we delete and + // then add, mainly because if EIP1 is added to nodeX and then EIP2 is + // removed from nodeX, we might remove the setup made for EIP1. The + // add/delete ordering of events is not guaranteed on the cloud where we + // depend on other controllers to execute the work for us however. By + // comparing the spec to the status and applying the following truth + // table we can ensure that order of events. + + // case ID | Egress IP to add | Egress IP to remove | ipsToAssign + // 1 | e1 | e1 | e1 + // 2 | e2 | e1 | - + // 3 | e2 | - | e2 + // 4 | - | e1 | - + + // Case 1 handles updates. Case 2 and 3 makes sure we don't add until we + // successfully delete. Case 4 just shows an example of what would + // happen if we don't have anything to add + ipsToAssign = ipsToAssign.Intersection(ipsToRemove) + } + + if !util.PlatformTypeIsEgressIPCloudProvider() { + if len(statusToRemove) > 0 { + // Delete the statusToRemove from the allocator cache. If we don't + // do this we will occupy assignment positions for the ipsToAssign, + // even though statusToRemove will be removed afterwards + eIPC.deleteAllocatorEgressIPAssignments(statusToRemove) + } + if len(ipsToAssign) > 0 { + statusToAdd = eIPC.assignEgressIPs(name, ipsToAssign.UnsortedList()) + statusToKeep = append(statusToKeep, statusToAdd...) + } + // Add all assignments which are to be kept to the allocator cache, + // allowing us to track all assignments which have been performed and + // avoid incorrect future assignments due to a de-synchronized cache. + eIPC.addAllocatorEgressIPAssignments(name, statusToKeep) + // Update the object only on an ADD/UPDATE. If we are processing a + // DELETE, new will be nil and we should not update the object. + if len(statusToAdd) > 0 || (len(statusToRemove) > 0 && new != nil) { + if err := eIPC.patchReplaceEgressIPStatus(name, statusToKeep); err != nil { + return err + } + } + } else { + // Even when running on a public cloud, we must make sure that we unwire EgressIP + // configuration from OVN *before* we instruct the CloudNetworkConfigController + // to remove the CloudPrivateIPConfig object from the cloud. + // CloudPrivateIPConfig objects can be in the "Deleting" state for a long time, + // waiting for the underlying cloud to finish its action and to report success of the + // unattach operation. Some clouds such as Azure will remove the IP address nearly + // immediately, but then they will take a long time (seconds to minutes) to actually report + // success of the removal operation. + if len(statusToRemove) > 0 { + // Delete all assignments that are to be removed from the allocator + // cache. If we don't do this we will occupy assignment positions for + // the ipsToAdd, even though statusToRemove will be removed afterwards + eIPC.deleteAllocatorEgressIPAssignments(statusToRemove) + // Before updating the cloud private IP object, we need to remove the OVN configuration + // for these invalid statuses so that traffic is not blackholed to non-existing setup in the + // cloud. Thus we patch the egressIP status with the valid set of statuses which will + // trigger an event for the ovnkube-master to take action upon. + // Note that once we figure out the statusToAdd parts below we will trigger an + // update to cloudPrivateIP object which will trigger another patch for the eIP object. + if err := eIPC.patchReplaceEgressIPStatus(name, statusToKeep); err != nil { + return err + } + } + // When egress IP is not fully assigned to a node, then statusToRemove may not + // have those entries, hence retrieve it from staleEgressIPs for removing + // the item from cloudprivateipconfig. + for _, toRemove := range statusToRemove { + if !staleEgressIPs.Has(toRemove.EgressIP) { + continue + } + staleEgressIPs.Delete(toRemove.EgressIP) + } + for staleEgressIP := range staleEgressIPs { + if nodeName := eIPC.deleteAllocatorEgressIPAssignmentIfExists(name, staleEgressIP); nodeName != "" { + statusToRemove = append(statusToRemove, + egressipv1.EgressIPStatusItem{EgressIP: staleEgressIP, Node: nodeName}) + } + } + // If running on a public cloud we should not program OVN just yet for assignment + // operations. We need confirmation from the cloud-network-config-controller that + // it can assign the IPs. reconcileCloudPrivateIPConfig will take care of + // processing the answer from the requests we make here, and update OVN + // accordingly when we know what the outcome is. + if len(ipsToAssign) > 0 { + statusToAdd = eIPC.assignEgressIPs(name, ipsToAssign.UnsortedList()) + statusToKeep = append(statusToKeep, statusToAdd...) + } + // Same as above: Add all assignments which are to be kept to the + // allocator cache, allowing us to track all assignments which have been + // performed and avoid incorrect future assignments due to a + // de-synchronized cache. + eIPC.addAllocatorEgressIPAssignments(name, statusToKeep) + + // Execute CloudPrivateIPConfig changes for assignments which need to be + // added/removed, assignments which don't change do not require any + // further setup. + if err := eIPC.executeCloudPrivateIPConfigChange(name, statusToAdd, statusToRemove); err != nil { + return err + } + } + + // Record the egress IP allocator count + metrics.RecordEgressIPCount(eIPC.getAllocationTotalCount()) + return nil +} + +// assignEgressIPs is the main assignment algorithm for egress IPs to nodes. +// Specifically we have a couple of hard constraints: a) the subnet of the node +// must be able to host the egress IP b) the egress IP cannot be a node IP c) +// the IP cannot already be assigned and reference by another EgressIP object d) +// no two egress IPs for the same EgressIP object can be assigned to the same +// node e) (for public clouds) the amount of egress IPs assigned to one node +// must respect its assignment capacity. Moreover there is a soft constraint: +// the assignments need to be balanced across all cluster nodes, so that no node +// becomes a bottleneck. The balancing is achieved by sorting the nodes in +// ascending order following their existing amount of allocations, and trying to +// assign the egress IP to the node with the lowest amount of allocations every +// time, this does not guarantee complete balance, but mostly complete. +func (eIPC *egressIPClusterController) assignEgressIPs(name string, egressIPs []string) []egressipv1.EgressIPStatusItem { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + assignments := []egressipv1.EgressIPStatusItem{} + assignableNodes, existingAllocations := eIPC.getSortedEgressData() + if len(assignableNodes) == 0 { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "NoMatchingNodeFound", "no assignable nodes for EgressIP: %s, please tag at least one node with label: %s", name, util.GetNodeEgressLabel()) + klog.Errorf("No assignable nodes found for EgressIP: %s and requested IPs: %v", name, egressIPs) + return assignments + } + klog.V(5).Infof("Current assignments are: %+v", existingAllocations) + for _, egressIP := range egressIPs { + klog.V(5).Infof("Will attempt assignment for egress IP: %s", egressIP) + eIP := net.ParseIP(egressIP) + if status, exists := existingAllocations[eIP.String()]; exists { + // On public clouds we will re-process assignments for the same IP + // multiple times due to the nature of syncing each individual + // CloudPrivateIPConfig one at a time. This means that we are + // expected to end up in this situation multiple times per sync. Ex: + // Say we an EgressIP is created with IP1, IP2, IP3. We begin by + // assigning them all the first round. Next we get the + // CloudPrivateIPConfig confirming the addition of IP1, leading us + // to re-assign IP2, IP3, but since we've already assigned them + // we'll end up here. This is not an error. What would be an error + // is if the user created EIP1 with IP1 and a second EIP2 with IP1 + if name == status.Name { + // IP is already assigned for this EgressIP object + assignments = append(assignments, egressipv1.EgressIPStatusItem{ + Node: status.Node, + EgressIP: eIP.String(), + }) + continue + } else { + klog.Errorf("IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node) + return assignments + } + } + if node := eIPC.isAnyClusterNodeIP(eIP); node != nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf( + &eIPRef, + v1.EventTypeWarning, + "UnsupportedRequest", + "Egress IP: %v for object EgressIP: %s is the IP address of node: %s, this is unsupported", eIP, name, node.name, + ) + klog.Errorf("Egress IP: %v is the IP address of node: %s", eIP, node.name) + return assignments + } + for _, eNode := range assignableNodes { + klog.V(5).Infof("Attempting assignment on egress node: %+v", eNode) + if eNode.getAllocationCountForEgressIP(name) > 0 { + klog.V(5).Infof("Node: %s is already in use by another egress IP for this EgressIP: %s, trying another node", eNode.name, name) + continue + } + if eNode.egressIPConfig.Capacity.IP < util.UnlimitedNodeCapacity { + if eNode.egressIPConfig.Capacity.IP-len(eNode.allocations) <= 0 { + klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IP capacity, trying another node", eNode.name) + continue + } + } + if eNode.egressIPConfig.Capacity.IPv4 < util.UnlimitedNodeCapacity && utilnet.IsIPv4(eIP) { + if eNode.egressIPConfig.Capacity.IPv4-getIPFamilyAllocationCount(eNode.allocations, false) <= 0 { + klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv4 capacity, trying another node", eNode.name) + continue + } + } + if eNode.egressIPConfig.Capacity.IPv6 < util.UnlimitedNodeCapacity && utilnet.IsIPv6(eIP) { + if eNode.egressIPConfig.Capacity.IPv6-getIPFamilyAllocationCount(eNode.allocations, true) <= 0 { + klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv6 capacity, trying another node", eNode.name) + continue + } + } + if (eNode.egressIPConfig.V6.Net != nil && eNode.egressIPConfig.V6.Net.Contains(eIP)) || + (eNode.egressIPConfig.V4.Net != nil && eNode.egressIPConfig.V4.Net.Contains(eIP)) { + assignments = append(assignments, egressipv1.EgressIPStatusItem{ + Node: eNode.name, + EgressIP: eIP.String(), + }) + klog.Infof("Successful assignment of egress IP: %s on node: %+v", egressIP, eNode) + eNode.allocations[eIP.String()] = name + break + } + } + } + if len(assignments) == 0 { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "NoMatchingNodeFound", "No matching nodes found, which can host any of the egress IPs: %v for object EgressIP: %s", egressIPs, name) + klog.Errorf("No matching host found for EgressIP: %s", name) + return assignments + } + if len(assignments) < len(egressIPs) { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "UnassignedRequest", "Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", name) + } + return assignments +} + +func getIPFamilyAllocationCount(allocations map[string]string, isIPv6 bool) (count int) { + for allocation := range allocations { + if utilnet.IsIPv4String(allocation) && !isIPv6 { + count++ + } + if utilnet.IsIPv6String(allocation) && isIPv6 { + count++ + } + } + return +} + +func (eIPC *egressIPClusterController) validateEgressIPSpec(name string, egressIPs []string) (sets.Set[string], error) { + validatedEgressIPs := sets.New[string]() + for _, egressIP := range egressIPs { + ip := net.ParseIP(egressIP) + if ip == nil { + eIPRef := v1.ObjectReference{ + Kind: "EgressIP", + Name: name, + } + eIPC.recorder.Eventf(&eIPRef, v1.EventTypeWarning, "InvalidEgressIP", "egress IP: %s for object EgressIP: %s is not a valid IP address", egressIP, name) + return nil, fmt.Errorf("unable to parse provided EgressIP: %s, invalid", egressIP) + } + validatedEgressIPs.Insert(ip.String()) + } + return validatedEgressIPs, nil +} + +// validateEgressIPStatus validates if the statuses are valid given what the +// cache knows about all egress nodes. WatchEgressNodes is initialized before +// any other egress IP handler, so the cache should be warm and correct once we +// start going this. +func (eIPC *egressIPClusterController) validateEgressIPStatus(name string, items []egressipv1.EgressIPStatusItem) (map[egressipv1.EgressIPStatusItem]string, map[egressipv1.EgressIPStatusItem]string) { + eIPC.allocator.Lock() + defer eIPC.allocator.Unlock() + valid, invalid := make(map[egressipv1.EgressIPStatusItem]string), make(map[egressipv1.EgressIPStatusItem]string) + for _, eIPStatus := range items { + validAssignment := true + eNode, exists := eIPC.allocator.cache[eIPStatus.Node] + if !exists { + klog.Errorf("Allocator error: EgressIP: %s claims to have an allocation on a node which is unassignable for egress IP: %s", name, eIPStatus.Node) + validAssignment = false + } else { + if eNode.getAllocationCountForEgressIP(name) > 1 { + klog.Errorf("Allocator error: EgressIP: %s claims multiple egress IPs on same node: %s, will attempt rebalancing", name, eIPStatus.Node) + validAssignment = false + } + if !eNode.isEgressAssignable { + klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which does not have egress label, will attempt rebalancing", name, eIPStatus.Node) + validAssignment = false + } + if !eNode.isReachable { + klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which is not reachable, will attempt rebalancing", name, eIPStatus.Node) + validAssignment = false + } + if !eNode.isReady { + klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which is not ready, will attempt rebalancing", name, eIPStatus.Node) + validAssignment = false + } + ip := net.ParseIP(eIPStatus.EgressIP) + if ip == nil { + klog.Errorf("Allocator error: EgressIP allocation contains unparsable IP address: %s", eIPStatus.EgressIP) + validAssignment = false + } + if node := eIPC.isAnyClusterNodeIP(ip); node != nil { + klog.Errorf("Allocator error: EgressIP allocation: %s is the IP of node: %s ", ip.String(), node.name) + validAssignment = false + } + if utilnet.IsIPv6(ip) && eNode.egressIPConfig.V6.Net != nil { + if !eNode.egressIPConfig.V6.Net.Contains(ip) { + klog.Errorf("Allocator error: EgressIP allocation: %s on subnet: %s which cannot host it", ip.String(), eNode.egressIPConfig.V4.Net.String()) + validAssignment = false + } + } else if !utilnet.IsIPv6(ip) && eNode.egressIPConfig.V4.Net != nil { + if !eNode.egressIPConfig.V4.Net.Contains(ip) { + klog.Errorf("Allocator error: EgressIP allocation: %s on subnet: %s which cannot host it", ip.String(), eNode.egressIPConfig.V4.Net.String()) + validAssignment = false + } + } else { + klog.Errorf("Allocator error: EgressIP allocation on node: %s which does not support its IP protocol version", eIPStatus.Node) + validAssignment = false + } + } + if validAssignment { + valid[eIPStatus] = "" + } else { + invalid[eIPStatus] = "" + } + } + return valid, invalid +} + +func (eIPC *egressIPClusterController) reconcileCloudPrivateIPConfig(old, new *ocpcloudnetworkapi.CloudPrivateIPConfig) error { + oldCloudPrivateIPConfig, newCloudPrivateIPConfig := &ocpcloudnetworkapi.CloudPrivateIPConfig{}, &ocpcloudnetworkapi.CloudPrivateIPConfig{} + shouldDelete, shouldAdd := false, false + nodeToDelete := "" + + if old != nil { + oldCloudPrivateIPConfig = old + // We need to handle three types of deletes, A) object UPDATE where the + // old egress IP <-> node assignment has been removed. This is indicated + // by the old object having a .status.node set and the new object having + // .status.node empty and the condition on the new being successful. B) + // object UPDATE where egress IP <-> node assignment has been updated. + // This is indicated by .status.node being different on old and new + // objects. C) object DELETE, for which new is nil + shouldDelete = oldCloudPrivateIPConfig.Status.Node != "" || new == nil + // On DELETE we need to delete the .spec.node for the old object + nodeToDelete = oldCloudPrivateIPConfig.Spec.Node + } + if new != nil { + newCloudPrivateIPConfig = new + // We should only proceed to setting things up for objects where the new + // object has the same .spec.node and .status.node, and assignment + // condition being true. This is how the cloud-network-config-controller + // indicates a successful cloud assignment. + shouldAdd = newCloudPrivateIPConfig.Status.Node == newCloudPrivateIPConfig.Spec.Node && + ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned && + v1.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == v1.ConditionTrue + // See above explanation for the delete + shouldDelete = shouldDelete && + (newCloudPrivateIPConfig.Status.Node == "" || newCloudPrivateIPConfig.Status.Node != oldCloudPrivateIPConfig.Status.Node) && + ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned && + v1.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == v1.ConditionTrue + // On UPDATE we need to delete the old .status.node + if shouldDelete { + nodeToDelete = oldCloudPrivateIPConfig.Status.Node + } + } + + // As opposed to reconcileEgressIP, here we are only interested in changes + // made to the status (since we are the only ones performing the change made + // to the spec). So don't process the object if there is no change made to + // the status. + if reflect.DeepEqual(oldCloudPrivateIPConfig.Status, newCloudPrivateIPConfig.Status) { + return nil + } + + if shouldDelete { + // Get the EgressIP owner reference + egressIPName, exists := oldCloudPrivateIPConfig.Annotations[util.OVNEgressIPOwnerRefLabel] + if !exists { + // If a CloudPrivateIPConfig object does not have an egress IP owner reference annotation upon deletion, + // there is no way that the object will get one after deletion. Hence, simply log a warning message here + // for informative purposes instead of throwing the same error and retrying time and time again. + klog.Warningf("CloudPrivateIPConfig object %q was missing the egress IP owner reference annotation "+ + "upon deletion", oldCloudPrivateIPConfig.Name) + return nil + } + // Check if the egress IP has been deleted or not, if we are processing + // a CloudPrivateIPConfig delete because the EgressIP has been deleted + // then we need to remove the setup made for it, but not update the + // object. + egressIP, err := eIPC.kube.GetEgressIP(egressIPName) + isDeleted := apierrors.IsNotFound(err) + if err != nil && !isDeleted { + return err + } + egressIPString := cloudPrivateIPConfigNameToIPString(oldCloudPrivateIPConfig.Name) + statusItem := egressipv1.EgressIPStatusItem{ + Node: nodeToDelete, + EgressIP: egressIPString, + } + // If we are not processing a delete, update the EgressIP object's + // status assignments + if !isDeleted { + // Deleting a status here means updating the object with the statuses we + // want to keep + updatedStatus := []egressipv1.EgressIPStatusItem{} + for _, status := range egressIP.Status.Items { + if !reflect.DeepEqual(status, statusItem) { + updatedStatus = append(updatedStatus, status) + } + } + if err := eIPC.patchReplaceEgressIPStatus(egressIP.Name, updatedStatus); err != nil { + return err + } + } + resyncEgressIPs, err := eIPC.removePendingOpsAndGetResyncs(egressIPName, egressIPString) + if err != nil { + return err + } + for _, resyncEgressIP := range resyncEgressIPs { + if err := eIPC.reconcileEgressIP(nil, &resyncEgressIP); err != nil { + return fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", egressIP.Name, err) + } + } + } + if shouldAdd { + // Get the EgressIP owner reference + egressIPName, exists := newCloudPrivateIPConfig.Annotations[util.OVNEgressIPOwnerRefLabel] + if !exists { + // If a CloudPrivateIPConfig object does not have an egress IP owner reference annotation upon creation + // then we should simply log this as a warning. We should get an update action later down the road where we + // then take care of the rest. Hence, do not throw an error here to avoid rescheduling. Even though not + // officially supported, think of someone creating a CloudPrivateIPConfig object manually which will never + // get the annotation. + klog.Warningf("CloudPrivateIPConfig object %q is missing the egress IP owner reference annotation. Skipping", + oldCloudPrivateIPConfig.Name) + return nil + } + egressIP, err := eIPC.kube.GetEgressIP(egressIPName) + if err != nil { + return err + } + egressIPString := cloudPrivateIPConfigNameToIPString(newCloudPrivateIPConfig.Name) + statusItem := egressipv1.EgressIPStatusItem{ + Node: newCloudPrivateIPConfig.Status.Node, + EgressIP: egressIPString, + } + // Guard against performing the same assignment twice, which might + // happen when multiple updates come in on the same object. + hasStatus := false + for _, status := range egressIP.Status.Items { + if reflect.DeepEqual(status, statusItem) { + hasStatus = true + break + } + } + if !hasStatus { + statusToKeep := append(egressIP.Status.Items, statusItem) + if err := eIPC.patchReplaceEgressIPStatus(egressIP.Name, statusToKeep); err != nil { + return err + } + } + + eIPC.pendingCloudPrivateIPConfigsMutex.Lock() + defer eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() + // Remove the finished add / update operation from the pending cache. We + // never process add and deletes in the same sync, and for updates: + // deletes are always performed before adds, hence we should only ever + // fully delete the item from the pending cache once the add has + // finished. + ops, pending := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] + if !pending { + // Do not return an error here, it will lead to spurious error + // messages on restart because we will process a bunch of adds for + // all existing objects, for which no CR was issued. + klog.V(5).Infof("No pending operation found for EgressIP: %s while processing created CloudPrivateIPConfig", egressIPName) + return nil + } + op, exists := ops[egressIPString] + if !exists { + klog.V(5).Infof("Pending operations found for EgressIP: %s, but not for the created CloudPrivateIPConfig: %s", egressIPName, egressIPString) + return nil + } + // Process finalized add / updates, hence: (op.toAdd != "" && + // op.toDelete != "") || (op.toAdd != "" && op.toDelete == ""), which is + // equivalent the below. + if op.toAdd != "" { + delete(ops, egressIPString) + } + if len(ops) == 0 { + delete(eIPC.pendingCloudPrivateIPConfigsOps, egressIPName) + } + } + return nil +} + +// cloudPrivateIPConfigNameToIPString converts the resource name to the string +// representation of net.IP. Given a limitation in the Kubernetes API server +// (see: https://github.com/kubernetes/kubernetes/pull/100950) +// CloudPrivateIPConfig.metadata.name cannot represent an IPv6 address. To +// work-around this limitation it was decided that the network plugin creating +// the CR will fully expand the IPv6 address and replace all colons with dots, +// ex: + +// The CloudPrivateIPConfig name fc00.f853.0ccd.e793.0000.0000.0000.0054 will be +// represented as address: fc00:f853:ccd:e793::54 + +// We thus need to replace every fifth character's dot with a colon. +func cloudPrivateIPConfigNameToIPString(name string) string { + // Handle IPv4, which will work fine. + if ip := net.ParseIP(name); ip != nil { + return name + } + // Handle IPv6, for which we want to convert the fully expanded "special + // name" to go's default IP representation + name = strings.ReplaceAll(name, ".", ":") + return net.ParseIP(name).String() +} + +// removePendingOps removes the existing pending CloudPrivateIPConfig operations +// from the cache and returns the EgressIP object which can be re-synced given +// the new assignment possibilities. +func (eIPC *egressIPClusterController) removePendingOpsAndGetResyncs(egressIPName, egressIP string) ([]egressipv1.EgressIP, error) { + eIPC.pendingCloudPrivateIPConfigsMutex.Lock() + defer eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() + ops, pending := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] + if !pending { + return nil, fmt.Errorf("no pending operation found for EgressIP: %s", egressIPName) + } + op, exists := ops[egressIP] + if !exists { + return nil, fmt.Errorf("pending operations found for EgressIP: %s, but not for the finalized IP: %s", egressIPName, egressIP) + } + // Make sure we are dealing with a delete operation, since for update + // operations will still need to process the add afterwards. + if op.toAdd == "" && op.toDelete != "" { + delete(ops, egressIP) + } + if len(ops) == 0 { + delete(eIPC.pendingCloudPrivateIPConfigsOps, egressIPName) + } + + // Some EgressIP objects might not have all of their spec.egressIPs + // assigned because there was no room to assign them. Hence, every time + // we process a final deletion for a CloudPrivateIPConfig: have a look + // at what other EgressIP objects have something un-assigned, and force + // a reconciliation on them by sending a synthetic update. + egressIPs, err := eIPC.kube.GetEgressIPs() + if err != nil { + return nil, fmt.Errorf("unable to list EgressIPs, err: %v", err) + } + resyncs := make([]egressipv1.EgressIP, 0, len(egressIPs.Items)) + for _, egressIP := range egressIPs.Items { + // Do not process the egress IP object which owns the + // CloudPrivateIPConfig for which we are currently processing the + // deletion for. + if egressIP.Name == egressIPName { + continue + } + unassigned := len(egressIP.Spec.EgressIPs) - len(egressIP.Status.Items) + ops, pending := eIPC.pendingCloudPrivateIPConfigsOps[egressIP.Name] + // If the EgressIP was never added to the pending cache to begin + // with, but has un-assigned egress IPs, try it. + if !pending && unassigned > 0 { + resyncs = append(resyncs, egressIP) + continue + } + // If the EgressIP has pending operations, have a look at if the + // unassigned operations superseed the pending ones. It could be + // that it could only execute a couple of assignments at one point. + if pending && unassigned > len(ops) { + resyncs = append(resyncs, egressIP) + } + } + return resyncs, nil +} diff --git a/go-controller/pkg/clustermanager/egressip_controller_test.go b/go-controller/pkg/clustermanager/egressip_controller_test.go new file mode 100644 index 0000000000..88f66e2bfe --- /dev/null +++ b/go-controller/pkg/clustermanager/egressip_controller_test.go @@ -0,0 +1,2349 @@ +package clustermanager + +import ( + "context" + "fmt" + "net" + "time" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "github.com/urfave/cli/v2" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8stypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + utilnet "k8s.io/utils/net" +) + +type fakeEgressIPDialer struct{} + +func (f fakeEgressIPDialer) dial(ip net.IP, timeout time.Duration) bool { + return true +} + +type fakeEgressIPHealthClient struct { + Connected bool + ProbeCount int + FakeProbeFailure bool +} + +func (fehc *fakeEgressIPHealthClient) IsConnected() bool { + return fehc.Connected +} + +func (fehc *fakeEgressIPHealthClient) Connect(dialCtx context.Context, mgmtIPs []net.IP, healthCheckPort int) bool { + if fehc.FakeProbeFailure { + return false + } + fehc.Connected = true + return true +} + +func (fehc *fakeEgressIPHealthClient) Disconnect() { + fehc.Connected = false + fehc.ProbeCount = 0 +} + +func (fehc *fakeEgressIPHealthClient) Probe(dialCtx context.Context) bool { + if fehc.Connected && !fehc.FakeProbeFailure { + fehc.ProbeCount++ + return true + } + return false +} + +type fakeEgressIPHealthClientAllocator struct{} + +func (f *fakeEgressIPHealthClientAllocator) allocate(nodeName string) healthcheck.EgressIPHealthClient { + return &fakeEgressIPHealthClient{} +} + +func newNamespaceMeta(namespace string, additionalLabels map[string]string) metav1.ObjectMeta { + labels := map[string]string{ + "name": namespace, + } + for k, v := range additionalLabels { + labels[k] = v + } + return metav1.ObjectMeta{ + UID: k8stypes.UID(namespace), + Name: namespace, + Labels: labels, + Annotations: map[string]string{}, + } +} + +func newNamespace(namespace string) *v1.Namespace { + return &v1.Namespace{ + ObjectMeta: newNamespaceMeta(namespace, nil), + Spec: v1.NamespaceSpec{}, + Status: v1.NamespaceStatus{}, + } +} + +var egressPodLabel = map[string]string{"egress": "needed"} + +func newEgressIPMeta(name string) metav1.ObjectMeta { + return metav1.ObjectMeta{ + UID: k8stypes.UID(name), + Name: name, + Labels: map[string]string{ + "name": name, + }, + } +} + +func setupNode(nodeName string, ipNets []string, mockAllocationIPs map[string]string) egressNode { + var v4IP, v6IP net.IP + var v4Subnet, v6Subnet *net.IPNet + for _, ipNet := range ipNets { + ip, net, _ := net.ParseCIDR(ipNet) + if utilnet.IsIPv6CIDR(net) { + v6Subnet = net + v6IP = ip + } else { + v4Subnet = net + v4IP = ip + } + } + + mockAllcations := map[string]string{} + for mockAllocationIP, egressIPName := range mockAllocationIPs { + mockAllcations[net.ParseIP(mockAllocationIP).String()] = egressIPName + } + + node := egressNode{ + egressIPConfig: &util.ParsedNodeEgressIPConfiguration{ + V4: util.ParsedIFAddr{ + IP: v4IP, + Net: v4Subnet, + }, + V6: util.ParsedIFAddr{ + IP: v6IP, + Net: v6Subnet, + }, + Capacity: util.Capacity{ + IP: util.UnlimitedNodeCapacity, + IPv4: util.UnlimitedNodeCapacity, + IPv6: util.UnlimitedNodeCapacity, + }, + }, + allocations: mockAllcations, + healthClient: hccAllocator.allocate(nodeName), // using fakeEgressIPHealthClientAllocator + name: nodeName, + isReady: true, + isReachable: true, + isEgressAssignable: true, + } + return node +} + +var _ = ginkgo.Describe("OVN cluster-manager EgressIP Operations", func() { + var ( + app *cli.App + fakeClusterManagerOVN *FakeClusterManager + ) + + const ( + node1Name = "node1" + node2Name = "node2" + egressIPName = "egressip" + egressIPName2 = "egressip-2" + namespace = "egressip-namespace" + v4NodeSubnet = "10.128.0.0/24" + v6NodeSubnet = "ae70::66/64" + ) + + dialer = fakeEgressIPDialer{} + hccAllocator = &fakeEgressIPHealthClientAllocator{} + + getEgressIPAllocatorSizeSafely := func() int { + fakeClusterManagerOVN.eIPC.allocator.Lock() + defer fakeClusterManagerOVN.eIPC.allocator.Unlock() + return len(fakeClusterManagerOVN.eIPC.allocator.cache) + } + + getEgressIPStatusLen := func(egressIPName string) func() int { + return func() int { + tmp, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), egressIPName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return len(tmp.Status.Items) + } + } + + getEgressIPStatus := func(egressIPName string) ([]string, []string) { + tmp, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), egressIPName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + var egressIPs, nodes []string + for _, status := range tmp.Status.Items { + egressIPs = append(egressIPs, status.EgressIP) + nodes = append(nodes, status.Node) + } + return egressIPs, nodes + } + + getEgressIPReassignmentCount := func() int { + reAssignmentCount := 0 + egressIPs, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().List(context.TODO(), metav1.ListOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + for _, egressIP := range egressIPs.Items { + if len(egressIP.Spec.EgressIPs) != len(egressIP.Status.Items) { + reAssignmentCount++ + } + } + return reAssignmentCount + } + + isEgressAssignableNode := func(nodeName string) func() bool { + return func() bool { + fakeClusterManagerOVN.eIPC.allocator.Lock() + defer fakeClusterManagerOVN.eIPC.allocator.Unlock() + if item, exists := fakeClusterManagerOVN.eIPC.allocator.cache[nodeName]; exists { + return item.isEgressAssignable + } + return false + } + } + + nodeSwitch := func() string { + _, nodes := getEgressIPStatus(egressIPName) + if len(nodes) != 1 { + return "" + } + return nodes[0] + } + + ginkgo.BeforeEach(func() { + // Restore global default values before each testcase + gomega.Expect(config.PrepareTestConfig()).To(gomega.Succeed()) + config.OVNKubernetesFeature.EnableEgressIP = true + config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort = 1234 + + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + fakeClusterManagerOVN = NewFakeClusterManagerOVN() + }) + + ginkgo.AfterEach(func() { + fakeClusterManagerOVN.shutdown() + }) + + ginkgo.Context("On node ADD/UPDATE/DELETE", func() { + ginkgo.It("should re-assign EgressIPs and perform proper egressIP allocation changes", func() { + app.Action = func(ctx *cli.Context) error { + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + egressNamespace := newNamespace(namespace) + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1.Labels = map[string]string{} + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + return nil + } + + err := app.Run([]string{ + app.Name, + }) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should re-assign EgressIPs and perform proper egressIP allocation changes during node deletion", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + }) + + ginkgo.Context("WatchEgressNodes", func() { + + ginkgo.It("should populated egress node data as they are tagged `egress assignable` with variants of IPv4/IPv6", func() { + app.Action = func(ctx *cli.Context) error { + + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + fakeClusterManagerOVN.start( + &v1.NodeList{ + Items: []v1.Node{}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) + + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, ip1V4Sub, err := net.ParseCIDR(node1IPv4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node2.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("using retry to create egress node with forced error followed by an update", func() { + app.Action = func(ctx *cli.Context) error { + nodeIPv4 := "192.168.126.51/24" + nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node", + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + fakeClusterManagerOVN.start( + &v1.NodeList{ + Items: []v1.Node{}, + }, + ) + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) + + _, ipV4Sub, err := net.ParseCIDR(nodeIPv4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, ipV6Sub, err := net.ParseCIDR(nodeIPv6) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node.Labels = map[string]string{} + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node.Name].egressIPConfig.V4.Net).To(gomega.Equal(ipV4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node.Name].egressIPConfig.V6.Net).To(gomega.Equal(ipV6Sub)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("ensure only one egressIP is assinged to the given node while rest of the IPs go into pending state", func() { + app.Action = func(ctx *cli.Context) error { + + config.Gateway.DisableSNATMultipleGWs = true + + egressIP1 := "192.168.126.25" + egressIP2 := "192.168.126.30" + egressIP3 := "192.168.126.35" + node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.13/24" + + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:50", "ip-address":"192.168.126.13/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + eIP2 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName2), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP3}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP1, eIP2}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Ensure first egressIP object is assigned, since only node1 is an egressNode, only 1IP will be assigned, other will be pending + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + recordedEvent := <-fakeClusterManagerOVN.fakeRecorder.Events + gomega.Expect(recordedEvent).To(gomega.ContainSubstring("Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", eIP1.Name)) + egressIPs1, nodes1 := getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[0]).To(gomega.Equal(node1.Name)) + possibleAssignments := sets.NewString(egressIP1, egressIP2) + gomega.Expect(possibleAssignments.Has(egressIPs1[0])).To(gomega.BeTrue()) + + // Ensure second egressIP object is also assigned to node1, but no OVN config will be done for this + gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) + egressIPs2, nodes2 := getEgressIPStatus(egressIPName2) + gomega.Expect(nodes2[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) + + // Make second node egressIP assignable + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ensure secondIP from first object gets assigned to node2 + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs1, nodes1 = getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[1]).To(gomega.Equal(node2.Name)) + gomega.Expect(possibleAssignments.Has(egressIPs1[1])).To(gomega.BeTrue()) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should skip populating egress node data for nodes that have incorrect IP address", func() { + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = true // no impact on global eIPC functions + nodeIPv4 := "192.168.126.510/24" + nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + fakeClusterManagerOVN.start( + &v1.NodeList{ + Items: []v1.Node{node}, + }, + ) + + allocatorItems := func() int { + return len(fakeClusterManagerOVN.eIPC.allocator.cache) + } + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(allocatorItems).Should(gomega.Equal(0)) + + node.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(allocatorItems).Should(gomega.Equal(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should probe nodes using grpc", func() { + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = false // no impact on global eIPC functions + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node1", + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", "", node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node2", + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + fakeClusterManagerOVN.start() + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) + + _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + + cachedEgressNode1 := fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name] + cachedEgressNode2 := fakeClusterManagerOVN.eIPC.allocator.cache[node2.Name] + gomega.Expect(cachedEgressNode1.egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + gomega.Expect(cachedEgressNode2.egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) + + // Explicitly call check reachibility so we need not to wait for slow periodic timer + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + gomega.Expect(cachedEgressNode1.isReachable).To(gomega.BeTrue()) + gomega.Expect(cachedEgressNode2.isReachable).To(gomega.BeTrue()) + + // The test cases below will manipulate the fakeEgressIPHealthClient used for mocking + // a gRPC session dedicated to monitoring each of the 2 nodes created. It does that + // by setting the probe fail boolean which in turn causes the mocked probe call to + // pretend that the periodic monitor succeeded or not. + tests := []struct { + desc string + node1FailProbes bool + node2FailProbes bool + // This function is an optional and generic function for the test case + // to allow any special pre-conditioning needed before invoking of + // checkEgressNodesReachabilityIterate in the test. + tcPrepareFunc func(hcc1, hcc2 *fakeEgressIPHealthClient) + }{ + { + desc: "disconnect nodes", + node1FailProbes: true, + node2FailProbes: true, + tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { + hcc1.Disconnect() + hcc2.Disconnect() + }, + }, + { + desc: "connect node1", + node2FailProbes: true, + }, + { + desc: "node1 connected, connect node2", + }, + { + desc: "node1 and node2 connected, bump only node2 counters", + node1FailProbes: true, + }, + { + desc: "node2 connected, disconnect node1", + node1FailProbes: true, + node2FailProbes: true, + tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { + hcc1.Disconnect() + }, + }, + { + desc: "connect node1, disconnect node2", + node2FailProbes: true, + tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { + hcc2.Disconnect() + }, + }, + { + desc: "node1 and node2 connected and both counters bump", + tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { + // Perform an additional iteration, to make probe counters to bump on second call + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + }, + }, + } + + // hcc1 and hcc2 are the mocked gRPC client to node1 and node2, respectively. + // They are what we use to manipulate whether probes to the node should fail or + // not, as well as a mechanism for explicitly disconnecting as part of the test. + hcc1 := cachedEgressNode1.healthClient.(*fakeEgressIPHealthClient) + hcc2 := cachedEgressNode2.healthClient.(*fakeEgressIPHealthClient) + + // ttIterCheck is the common function used by each test case. It will check whether + // a client changed its connection state and if the number of probes to the node + // changed as expected. + ttIterCheck := func(hcc *fakeEgressIPHealthClient, prevNodeIsConnected bool, prevProbes int, failProbes bool, desc string) { + currNodeIsConnected := hcc.IsConnected() + gomega.Expect(currNodeIsConnected || failProbes).To(gomega.BeTrue(), desc) + + if !prevNodeIsConnected && !currNodeIsConnected { + // Not connected (before and after): no probes should be successful + gomega.Expect(hcc.ProbeCount).To(gomega.Equal(prevProbes), desc) + } else if prevNodeIsConnected && currNodeIsConnected { + if failProbes { + // Still connected, but no probes should be successful + gomega.Expect(prevProbes).To(gomega.Equal(hcc.ProbeCount), desc) + } else { + // Still connected and probe counters should be going up + gomega.Expect(prevProbes < hcc.ProbeCount).To(gomega.BeTrue(), desc) + } + } + } + + for _, tt := range tests { + hcc1.FakeProbeFailure = tt.node1FailProbes + hcc2.FakeProbeFailure = tt.node2FailProbes + + prevNode1IsConnected := hcc1.IsConnected() + prevNode2IsConnected := hcc2.IsConnected() + prevNode1Probes := hcc1.ProbeCount + prevNode2Probes := hcc2.ProbeCount + + if tt.tcPrepareFunc != nil { + tt.tcPrepareFunc(hcc1, hcc2) + } + + // Perform connect or probing, depending on the state of the connections + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + + ttIterCheck(hcc1, prevNode1IsConnected, prevNode1Probes, tt.node1FailProbes, tt.desc) + ttIterCheck(hcc2, prevNode2IsConnected, prevNode2Probes, tt.node2FailProbes, tt.desc) + } + + gomega.Expect(hcc1.IsConnected()).To(gomega.BeTrue()) + gomega.Expect(hcc2.IsConnected()).To(gomega.BeTrue()) + + // Lastly, remove egress assignable from node 2 and make sure it disconnects + node2.Labels = map[string]string{} + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + + // Explicitly call check reachibility so we need not to wait for slow periodic timer + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + + gomega.Expect(hcc1.IsConnected()).To(gomega.BeTrue()) + gomega.Expect(hcc2.IsConnected()).To(gomega.BeFalse()) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("WatchEgressNodes running with WatchEgressIP", func() { + ginkgo.It("should result in error and event if specified egress IP is a cluster node IP", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.51" + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + gomega.Eventually(fakeClusterManagerOVN.fakeRecorder.Events).Should(gomega.HaveLen(3)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should remove stale EgressIP setup when node label is removed while ovnkube-master is not running and assign to newly labelled node", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.25" + node1IPv4 := "192.168.126.51/24" + + egressNamespace := newNamespace(namespace) + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node1.Name, + EgressIP: egressIP1, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + ) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should only get assigned EgressIPs which matches their subnet when the node is tagged", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, ip1V4Sub, err := net.ParseCIDR(node1IPv4) + _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) + _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeFalse()) + gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache[node2.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) + gomega.Eventually(eIP.Status.Items).Should(gomega.HaveLen(0)) + + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should re-balance EgressIPs when their node is removed", func() { + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = true // no impact on global eIPC functions + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.12/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + node1 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node2Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }) + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes = getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + + err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, *metav1.NewDeleteOptions(0)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).ToNot(gomega.HaveKey(node1.Name)) + gomega.Expect(fakeClusterManagerOVN.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + + getNewNode := func() string { + _, nodes = getEgressIPStatus(egressIPName) + if len(nodes) > 0 { + return nodes[0] + } + return "" + } + + gomega.Eventually(getNewNode).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("egress node update should not mark the node as reachable if there was no label/readiness change", func() { + // When an egress node becomes reachable during a node update event and there is no changes to node labels/readiness + // unassigned egress IP should be eventually added by the periodic reachability check. + // Test steps: + // - disable periodic check from running in background, so it can be called directly from the test + // - assign egress IP to an available node + // - make the node unreachable and verify that the egress IP was unassigned + // - make the node reachable and update a node + // - verify that the egress IP was assigned by calling the periodic reachability check + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = true // no impact on global eIPC functions + egressIP := "192.168.126.101" + nodeIPv4 := "192.168.126.51/24" + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node1Name, + Annotations: map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", nodeIPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\"]}", v4NodeSubnet), + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", + }, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP1}, + }, + &v1.NodeList{ + Items: []v1.Node{node}, + }, + ) + + // Virtually disable background reachability check by using a huge interval + fakeClusterManagerOVN.eIPC.reachabilityCheckInterval = time.Hour + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) + egressIPs, _ := getEgressIPStatus(eIP1.Name) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + hcClient := fakeClusterManagerOVN.eIPC.allocator.cache[node.Name].healthClient.(*fakeEgressIPHealthClient) + hcClient.FakeProbeFailure = true + // explicitly call check reachability, periodic checker is not active + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(0)) + + hcClient.FakeProbeFailure = false + node.Annotations["test"] = "dummy" + _, err = fakeClusterManagerOVN.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(hcClient.IsConnected()).Should(gomega.Equal(true)) + // the node should not be marked as reachable in the update handler as it is not getting added + gomega.Consistently(func() bool { return fakeClusterManagerOVN.eIPC.allocator.cache[node.Name].isReachable }).Should(gomega.Equal(false)) + + // egress IP should get assigned on the next checkEgressNodesReachabilityIterate call + // explicitly call check reachability, periodic checker is not active + checkEgressNodesReachabilityIterate(fakeClusterManagerOVN.eIPC) + gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) + + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("IPv6 assignment", func() { + + ginkgo.It("should be able to allocate non-conflicting IP on node with lowest amount of allocations", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "0:0:0:0:0:feff:c0a8:8e0f" + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should be able to allocate several EgressIPs and avoid the same node", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP1 := "0:0:0:0:0:feff:c0a8:8e0d" + egressIP2 := "0:0:0:0:0:feff:c0a8:8e0f" + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(2)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP1).String())) + gomega.Expect(assignedStatuses[1].Node).To(gomega.Equal(node1.name)) + gomega.Expect(assignedStatuses[1].EgressIP).To(gomega.Equal(net.ParseIP(egressIP2).String())) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should be able to allocate several EgressIPs and avoid the same node and leave one un-assigned without error", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP1 := "0:0:0:0:0:feff:c0a8:8e0d" + egressIP2 := "0:0:0:0:0:feff:c0a8:8e0e" + egressIP3 := "0:0:0:0:0:feff:c0a8:8e0f" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2, egressIP3}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(2)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP1).String())) + gomega.Expect(assignedStatuses[1].Node).To(gomega.Equal(node1.name)) + gomega.Expect(assignedStatuses[1].EgressIP).To(gomega.Equal(net.ParseIP(egressIP2).String())) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should return the already allocated IP with the same node if it is allocated again", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "0:0:0:0:0:feff:c0a8:8e32" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{egressIP: egressIPName, "0:0:0:0:0:feff:c0a8:8e1e": "bogus1"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus2"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + egressIPs := []string{egressIP} + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node1Name)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate node IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "0:0:0:0:0:feff:c0a8:8e0c" + + node1 := setupNode(node1Name, []string{egressIP + "/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate conflicting compressed IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "::feff:c0a8:8e32" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + egressIPs := []string{egressIP} + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate IPv4 IP on nodes which can only host IPv6", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "192.168.126.16" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIPs := []string{egressIP} + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: eIPs, + }, + } + + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should be able to allocate non-conflicting compressed uppercase IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "::FEFF:C0A8:8D32" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate conflicting compressed uppercase IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIP := "::FEFF:C0A8:8E32" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + egressIPs := []string{egressIP} + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not be able to allocate invalid IP", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIPs := []string{"0:0:0:0:0:feff:c0a8:8e32:5"} + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + assignedStatuses, err := fakeClusterManagerOVN.eIPC.validateEgressIPSpec(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.Equal(fmt.Sprintf("unable to parse provided EgressIP: %s, invalid", egressIPs[0]))) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("Dual-stack assignment", func() { + + ginkgo.It("should be able to allocate non-conflicting IPv4 on node which can host it, even if it happens to be the node with more assignments", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + egressIP := "192.168.126.99" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus1"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus1", "192.168.126.102": "bogus2"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + assignedStatuses := fakeClusterManagerOVN.eIPC.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) + gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) + gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + }) + + ginkgo.Context("IPv4 assignment", func() { + + ginkgo.It("Should not be able to assign egress IP defined in CIDR notation", func() { + app.Action = func(ctx *cli.Context) error { + + fakeClusterManagerOVN.start() + + egressIPs := []string{"192.168.126.99/32"} + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: egressIPs, + }, + } + + validatedIPs, err := fakeClusterManagerOVN.eIPC.validateEgressIPSpec(eIP.Name, eIP.Spec.EgressIPs) + gomega.Expect(err).To(gomega.HaveOccurred()) + gomega.Expect(err.Error()).To(gomega.Equal(fmt.Sprintf("unable to parse provided EgressIP: %s, invalid", egressIPs[0]))) + gomega.Expect(validatedIPs).To(gomega.HaveLen(0)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + }) + + ginkgo.Context("WatchEgressIP", func() { + + ginkgo.It("should update status correctly for single-stack IPv4", func() { + app.Action = func(ctx *cli.Context) error { + fakeClusterManagerOVN.start() + + egressIP := "192.168.126.10" + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "does-not-exist", + }, + }, + }, + } + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should update status correctly for single-stack IPv6", func() { + app.Action = func(ctx *cli.Context) error { + fakeClusterManagerOVN.start() + + egressIP := "0:0:0:0:0:feff:c0a8:8e0d" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) + node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(net.ParseIP(egressIP).String())) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should update status correctly for dual-stack", func() { + app.Action = func(ctx *cli.Context) error { + fakeClusterManagerOVN.start() + + egressIPv4 := "192.168.126.101" + egressIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus1"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus2", "192.168.126.102": "bogus3"}) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIPv4, egressIPv6}, + }, + } + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes).To(gomega.ConsistOf(node2.name, node1.name)) + gomega.Expect(egressIPs).To(gomega.ConsistOf(net.ParseIP(egressIPv6).String(), net.ParseIP(egressIPv4).String())) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("syncEgressIP for dual-stack", func() { + + ginkgo.It("should not update valid assignments", func() { + app.Action = func(ctx *cli.Context) error { + + egressIPv4 := "192.168.126.101" + egressIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" + + node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.102": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIPv4, egressIPv6}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIPv4, + Node: node2.name, + }, + { + EgressIP: net.ParseIP(egressIPv6).String(), + Node: node1.name, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes).To(gomega.ConsistOf(eIP.Status.Items[0].Node, eIP.Status.Items[1].Node)) + gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("syncEgressIP for IPv4", func() { + + ginkgo.It("should update invalid assignments on duplicated node", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + egressIP2 := "192.168.126.100" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{egressIP1: egressIPName, egressIP2: egressIPName}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIP1, + Node: node1.name, + }, + { + EgressIP: egressIP2, + Node: node1.name, + }, + }, + }, + } + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes).To(gomega.ConsistOf(node1.name, node2.name)) + gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should update invalid assignments with incorrectly parsed IP", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + egressIPIncorrect := "192.168.126.1000" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIPIncorrect, + Node: node1.name, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should update invalid assignments with unhostable IP on a node", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + egressIPIncorrect := "192.168.128.100" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIPIncorrect, + Node: node1.name, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("should not update valid assignment", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + EgressIP: egressIP1, + Node: node1.name, + }, + }, + }, + } + + fakeClusterManagerOVN.start( + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + ) + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("AddEgressIP for IPv4", func() { + + ginkgo.It("should not create two EgressIPs with same egress IP value", func() { + app.Action = func(ctx *cli.Context) error { + egressIP1 := "192.168.126.101" + + node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta("egressip"), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + } + eIP2 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta("egressip2"), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + }, + } + + fakeClusterManagerOVN.start() + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP1, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(eIP1.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP2, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(eIP2.Name)).Should(gomega.Equal(0)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + }) + + ginkgo.Context("UpdateEgressIP for IPv4", func() { + + ginkgo.It("should perform re-assingment of EgressIPs", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + updateEgressIP := "192.168.126.10" + + node1 := setupNode(node1Name, []string{"192.168.126.41/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) + node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + } + fakeClusterManagerOVN.start() + + fakeClusterManagerOVN.eIPC.allocator.cache[node1.name] = &node1 + fakeClusterManagerOVN.eIPC.allocator.cache[node2.name] = &node2 + _, err := fakeClusterManagerOVN.eIPC.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP1, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + eIPToUpdate, err := fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + eIPToUpdate.Spec.EgressIPs = []string{updateEgressIP} + + _, err = fakeClusterManagerOVN.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPToUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + getEgressIP := func() string { + egressIPs, _ = getEgressIPStatus(egressIPName) + if len(egressIPs) == 0 { + return "try again" + } + return egressIPs[0] + } + + gomega.Eventually(getEgressIP).Should(gomega.Equal(updateEgressIP)) + _, nodes = getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) +}) diff --git a/go-controller/pkg/clustermanager/egressip_event_handler.go b/go-controller/pkg/clustermanager/egressip_event_handler.go new file mode 100644 index 0000000000..e1ff8142fb --- /dev/null +++ b/go-controller/pkg/clustermanager/egressip_event_handler.go @@ -0,0 +1,263 @@ +package clustermanager + +import ( + "fmt" + "reflect" + + ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" + egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + objretry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + v1 "k8s.io/api/core/v1" + cache "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" +) + +// egressIPClusterControllerEventHandler object handles the events +// from retry framework for the egressIPClusterController. +type egressIPClusterControllerEventHandler struct { + objretry.EventHandler + objType reflect.Type + eIPC *egressIPClusterController + syncFunc func([]interface{}) error +} + +// egressIPClusterControllerEventHandler functions + +// AddResource adds the specified object to the cluster according to its type and +// returns the error, if any, yielded during object creation. +func (h *egressIPClusterControllerEventHandler) AddResource(obj interface{}, fromRetryLoop bool) error { + switch h.objType { + case factory.EgressNodeType: + node := obj.(*v1.Node) + // Initialize the allocator on every update, + // ovnkube-node/cloud-network-config-controller will make sure to + // annotate the node with the egressIPConfig, but that might have + // happened after we processed the ADD for that object, hence keep + // retrying for all UPDATEs. + if err := h.eIPC.initEgressIPAllocator(node); err != nil { + klog.Warningf("Egress node initialization error: %v", err) + } + nodeEgressLabel := util.GetNodeEgressLabel() + nodeLabels := node.GetLabels() + _, hasEgressLabel := nodeLabels[nodeEgressLabel] + if hasEgressLabel { + h.eIPC.setNodeEgressAssignable(node.Name, true) + } + isReady := h.eIPC.isEgressNodeReady(node) + if isReady { + h.eIPC.setNodeEgressReady(node.Name, true) + } + isReachable := h.eIPC.isEgressNodeReachable(node) + if hasEgressLabel && isReachable && isReady { + h.eIPC.setNodeEgressReachable(node.Name, true) + if err := h.eIPC.addEgressNode(node.Name); err != nil { + return err + } + } + case factory.EgressIPType: + eIP := obj.(*egressipv1.EgressIP) + return h.eIPC.reconcileEgressIP(nil, eIP) + case factory.CloudPrivateIPConfigType: + cloudPrivateIPConfig := obj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) + return h.eIPC.reconcileCloudPrivateIPConfig(nil, cloudPrivateIPConfig) + default: + return fmt.Errorf("no add function for object type %s", h.objType) + } + return nil +} + +// UpdateResource updates the specified object in the cluster to its version in newObj according +// to its type and returns the error, if any, yielded during the object update. +// The inRetryCache boolean argument is to indicate if the given resource is in the retryCache or not. +func (h *egressIPClusterControllerEventHandler) UpdateResource(oldObj, newObj interface{}, inRetryCache bool) error { + switch h.objType { + case factory.EgressIPType: + oldEIP := oldObj.(*egressipv1.EgressIP) + newEIP := newObj.(*egressipv1.EgressIP) + return h.eIPC.reconcileEgressIP(oldEIP, newEIP) + case factory.EgressNodeType: + oldNode := oldObj.(*v1.Node) + newNode := newObj.(*v1.Node) + // Initialize the allocator on every update, + // ovnkube-node/cloud-network-config-controller will make sure to + // annotate the node with the egressIPConfig, but that might have + // happened after we processed the ADD for that object, hence keep + // retrying for all UPDATEs. + if err := h.eIPC.initEgressIPAllocator(newNode); err != nil { + klog.Warningf("Egress node initialization error: %v", err) + } + nodeEgressLabel := util.GetNodeEgressLabel() + oldLabels := oldNode.GetLabels() + newLabels := newNode.GetLabels() + _, oldHadEgressLabel := oldLabels[nodeEgressLabel] + _, newHasEgressLabel := newLabels[nodeEgressLabel] + // If the node is not labeled for egress assignment, just return + // directly, we don't really need to set the ready / reachable + // status on this node if the user doesn't care about using it. + if !oldHadEgressLabel && !newHasEgressLabel { + return nil + } + h.eIPC.setNodeEgressAssignable(newNode.Name, newHasEgressLabel) + if oldHadEgressLabel && !newHasEgressLabel { + klog.Infof("Node: %s has been un-labeled, deleting it from egress assignment", newNode.Name) + return h.eIPC.deleteEgressNode(oldNode.Name) + } + isOldReady := h.eIPC.isEgressNodeReady(oldNode) + isNewReady := h.eIPC.isEgressNodeReady(newNode) + isNewReachable := h.eIPC.isEgressNodeReachable(newNode) + h.eIPC.setNodeEgressReady(newNode.Name, isNewReady) + if !oldHadEgressLabel && newHasEgressLabel { + klog.Infof("Node: %s has been labeled, adding it for egress assignment", newNode.Name) + if isNewReady && isNewReachable { + h.eIPC.setNodeEgressReachable(newNode.Name, isNewReachable) + if err := h.eIPC.addEgressNode(newNode.Name); err != nil { + return err + } + } else { + klog.Warningf("Node: %s has been labeled, but node is not ready"+ + " and reachable, cannot use it for egress assignment", newNode.Name) + } + return nil + } + if isOldReady == isNewReady { + return nil + } + if !isNewReady { + klog.Warningf("Node: %s is not ready, deleting it from egress assignment", newNode.Name) + if err := h.eIPC.deleteEgressNode(newNode.Name); err != nil { + return err + } + } else if isNewReady && isNewReachable { + klog.Infof("Node: %s is ready and reachable, adding it for egress assignment", newNode.Name) + h.eIPC.setNodeEgressReachable(newNode.Name, isNewReachable) + if err := h.eIPC.addEgressNode(newNode.Name); err != nil { + return err + } + } + return nil + case factory.CloudPrivateIPConfigType: + oldCloudPrivateIPConfig := oldObj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) + newCloudPrivateIPConfig := newObj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) + return h.eIPC.reconcileCloudPrivateIPConfig(oldCloudPrivateIPConfig, newCloudPrivateIPConfig) + default: + return fmt.Errorf("no update function for object type %s", h.objType) + } +} + +// DeleteResource deletes the object from the cluster according to the delete logic of its resource type. +// cachedObj is the internal cache entry for this object, used for now for pods and network policies. +func (h *egressIPClusterControllerEventHandler) DeleteResource(obj, cachedObj interface{}) error { + switch h.objType { + case factory.EgressIPType: + eIP := obj.(*egressipv1.EgressIP) + return h.eIPC.reconcileEgressIP(eIP, nil) + case factory.EgressNodeType: + node := obj.(*v1.Node) + h.eIPC.deleteNodeForEgress(node) + nodeEgressLabel := util.GetNodeEgressLabel() + nodeLabels := node.GetLabels() + _, hasEgressLabel := nodeLabels[nodeEgressLabel] + if hasEgressLabel { + if err := h.eIPC.deleteEgressNode(node.Name); err != nil { + return err + } + } + return nil + case factory.CloudPrivateIPConfigType: + cloudPrivateIPConfig := obj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) + return h.eIPC.reconcileCloudPrivateIPConfig(cloudPrivateIPConfig, nil) + default: + return fmt.Errorf("no delete function for object type %s", h.objType) + } +} + +func (h *egressIPClusterControllerEventHandler) SyncFunc(objs []interface{}) error { + var syncFunc func([]interface{}) error + + if h.syncFunc != nil { + // syncFunc was provided explicitly + syncFunc = h.syncFunc + } else { + switch h.objType { + case factory.EgressNodeType: + syncFunc = h.eIPC.initEgressNodeReachability + case factory.EgressIPType, + factory.CloudPrivateIPConfigType: + syncFunc = nil + + default: + return fmt.Errorf("no sync function for object type %s", h.objType) + } + } + if syncFunc == nil { + return nil + } + return syncFunc(objs) +} + +// RecordAddEvent records the add event on this object. Not used here. +func (h *egressIPClusterControllerEventHandler) RecordAddEvent(obj interface{}) { +} + +// RecordUpdateEvent records the update event on this object. Not used here. +func (h *egressIPClusterControllerEventHandler) RecordUpdateEvent(obj interface{}) { +} + +// RecordDeleteEvent records the delete event on this object. Not used here. +func (h *egressIPClusterControllerEventHandler) RecordDeleteEvent(obj interface{}) { +} + +func (h *egressIPClusterControllerEventHandler) RecordSuccessEvent(obj interface{}) { +} + +// RecordErrorEvent records an error event on this object. Not used here. +func (h *egressIPClusterControllerEventHandler) RecordErrorEvent(obj interface{}, reason string, err error) { +} + +// isResourceScheduled returns true if the object has been scheduled. Always returns true. +func (h *egressIPClusterControllerEventHandler) IsResourceScheduled(obj interface{}) bool { + return true +} + +// IsObjectInTerminalState returns true if the object is a in terminal state. Always returns true. +func (h *egressIPClusterControllerEventHandler) IsObjectInTerminalState(obj interface{}) bool { + return false +} + +func (h *egressIPClusterControllerEventHandler) AreResourcesEqual(obj1, obj2 interface{}) (bool, error) { + return false, nil +} + +// GetInternalCacheEntry returns the internal cache entry for this object +func (h *egressIPClusterControllerEventHandler) GetInternalCacheEntry(obj interface{}) interface{} { + return nil +} + +// getResourceFromInformerCache returns the latest state of the object from the informers cache +// given an object key and its type +func (h *egressIPClusterControllerEventHandler) GetResourceFromInformerCache(key string) (interface{}, error) { + var obj interface{} + var name string + var err error + + _, name, err = cache.SplitMetaNamespaceKey(key) + if err != nil { + return nil, fmt.Errorf("failed to split key %s: %v", key, err) + } + + switch h.objType { + case factory.EgressNodeType: + obj, err = h.eIPC.watchFactory.GetNode(name) + case factory.CloudPrivateIPConfigType: + obj, err = h.eIPC.watchFactory.GetCloudPrivateIPConfig(name) + case factory.EgressIPType: + obj, err = h.eIPC.watchFactory.GetEgressIP(name) + + default: + err = fmt.Errorf("object type %s not supported, cannot retrieve it from informers cache", + h.objType) + } + return obj, err +} diff --git a/go-controller/pkg/clustermanager/fake_cluster_manager_test.go b/go-controller/pkg/clustermanager/fake_cluster_manager_test.go new file mode 100644 index 0000000000..805f9d181e --- /dev/null +++ b/go-controller/pkg/clustermanager/fake_cluster_manager_test.go @@ -0,0 +1,65 @@ +package clustermanager + +import ( + "sync" + + "github.com/onsi/gomega" + egressip "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + egressipfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/tools/record" +) + +type FakeClusterManager struct { + fakeClient *util.OVNClusterManagerClientset + watcher *factory.WatchFactory + eIPC *egressIPClusterController + stopChan chan struct{} + wg *sync.WaitGroup + fakeRecorder *record.FakeRecorder +} + +func NewFakeClusterManagerOVN() *FakeClusterManager { + return &FakeClusterManager{ + fakeRecorder: record.NewFakeRecorder(10), + } +} + +func (o *FakeClusterManager) start(objects ...runtime.Object) { + egressIPObjects := []runtime.Object{} + v1Objects := []runtime.Object{} + for _, object := range objects { + if _, isEgressIPObject := object.(*egressip.EgressIPList); isEgressIPObject { + egressIPObjects = append(egressIPObjects, object) + } else { + v1Objects = append(v1Objects, object) + } + } + o.fakeClient = &util.OVNClusterManagerClientset{ + KubeClient: fake.NewSimpleClientset(v1Objects...), + EgressIPClient: egressipfake.NewSimpleClientset(egressIPObjects...), + } + o.init() +} + +func (o *FakeClusterManager) init() { + var err error + o.watcher, err = factory.NewClusterManagerWatchFactory(o.fakeClient) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = o.watcher.Start() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + o.stopChan = make(chan struct{}) + o.wg = &sync.WaitGroup{} + o.eIPC = newEgressIPController(o.fakeClient, o.watcher, o.fakeRecorder) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} + +func (o *FakeClusterManager) shutdown() { + o.watcher.Shutdown() + close(o.stopChan) + o.wg.Wait() +} diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 36264388c6..04472d3c2f 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -166,7 +166,6 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory iFactory: informerfactory.NewSharedInformerFactory(ovnClientset.KubeClient, resyncInterval), eipFactory: egressipinformerfactory.NewSharedInformerFactory(ovnClientset.EgressIPClient, resyncInterval), efFactory: egressfirewallinformerfactory.NewSharedInformerFactory(ovnClientset.EgressFirewallClient, resyncInterval), - cpipcFactory: ocpcloudnetworkinformerfactory.NewSharedInformerFactory(ovnClientset.CloudNetworkClient, resyncInterval), egressQoSFactory: egressqosinformerfactory.NewSharedInformerFactory(ovnClientset.EgressQoSClient, resyncInterval), mnpFactory: mnpinformerfactory.NewSharedInformerFactory(ovnClientset.MultiNetworkPolicyClient, resyncInterval), egressServiceFactory: egressserviceinformerfactory.NewSharedInformerFactory(ovnClientset.EgressServiceClient, resyncInterval), @@ -256,12 +255,6 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory return nil, err } } - if util.PlatformTypeIsEgressIPCloudProvider() { - wf.informers[CloudPrivateIPConfigType], err = newInformer(CloudPrivateIPConfigType, wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer()) - if err != nil { - return nil, err - } - } if config.OVNKubernetesFeature.EnableEgressQoS { wf.informers[EgressQoSType], err = newInformer(EgressQoSType, wf.egressQoSFactory.K8s().V1().EgressQoSes().Informer()) if err != nil { diff --git a/go-controller/pkg/factory/factory_test.go b/go-controller/pkg/factory/factory_test.go index 43c91bb58a..5810119e46 100644 --- a/go-controller/pkg/factory/factory_test.go +++ b/go-controller/pkg/factory/factory_test.go @@ -253,6 +253,7 @@ func (c *handlerCalls) getDeleted() int { var _ = Describe("Watch Factory Operations", func() { var ( ovnClientset *util.OVNMasterClientset + ovnCMClientset *util.OVNClusterManagerClientset fakeClient *fake.Clientset egressIPFakeClient *egressipfake.Clientset egressFirewallFakeClient *egressfirewallfake.Clientset @@ -307,10 +308,14 @@ var _ = Describe("Watch Factory Operations", func() { KubeClient: fakeClient, EgressIPClient: egressIPFakeClient, EgressFirewallClient: egressFirewallFakeClient, - CloudNetworkClient: cloudNetworkFakeClient, EgressQoSClient: egressQoSFakeClient, EgressServiceClient: egressServiceFakeClient, } + ovnCMClientset = &util.OVNClusterManagerClientset{ + KubeClient: fakeClient, + EgressIPClient: egressIPFakeClient, + CloudNetworkClient: cloudNetworkFakeClient, + } pods = make([]*v1.Pod, 0) podWatch = objSetup(fakeClient, "pods", func(core.Action) (bool, runtime.Object, error) { @@ -420,6 +425,8 @@ var _ = Describe("Watch Factory Operations", func() { testExisting := func(objType reflect.Type, namespace string, sel labels.Selector, priority int) { if objType == EndpointSliceType { wf, err = NewNodeWatchFactory(ovnClientset.GetNodeClientset(), nodeName) + } else if objType == CloudPrivateIPConfigType { + wf, err = NewClusterManagerWatchFactory(ovnCMClientset) } else { wf, err = NewMasterWatchFactory(ovnClientset) } @@ -442,6 +449,8 @@ var _ = Describe("Watch Factory Operations", func() { testExistingFilteredHandler := func(objType reflect.Type, realObj reflect.Type, namespace string, sel labels.Selector, priority int) { if objType == EndpointSliceType { wf, err = NewNodeWatchFactory(ovnClientset.GetNodeClientset(), nodeName) + } else if objType == CloudPrivateIPConfigType { + wf, err = NewClusterManagerWatchFactory(ovnCMClientset) } else { wf, err = NewMasterWatchFactory(ovnClientset) } @@ -572,6 +581,8 @@ var _ = Describe("Watch Factory Operations", func() { testExisting := func(objType reflect.Type) { if objType == EndpointSliceType { wf, err = NewNodeWatchFactory(ovnClientset.GetNodeClientset(), nodeName) + } else if objType == CloudPrivateIPConfigType { + wf, err = NewClusterManagerWatchFactory(ovnCMClientset) } else { wf, err = NewMasterWatchFactory(ovnClientset) } @@ -1648,7 +1659,7 @@ var _ = Describe("Watch Factory Operations", func() { wf.RemoveEgressIPHandler(h) }) It("responds to cloudPrivateIPConfig add/update/delete events", func() { - wf, err = NewMasterWatchFactory(ovnClientset) + wf, err = NewClusterManagerWatchFactory(ovnCMClientset) Expect(err).NotTo(HaveOccurred()) err = wf.Start() Expect(err).NotTo(HaveOccurred()) diff --git a/go-controller/pkg/ovn/base_event_handler.go b/go-controller/pkg/ovn/base_event_handler.go index 2d25b31157..874d773d08 100644 --- a/go-controller/pkg/ovn/base_event_handler.go +++ b/go-controller/pkg/ovn/base_event_handler.go @@ -29,7 +29,6 @@ func hasResourceAnUpdateFunc(objType reflect.Type) bool { factory.EgressIPPodType, factory.EgressNodeType, factory.EgressFwNodeType, - factory.CloudPrivateIPConfigType, factory.LocalPodSelectorType, factory.NamespaceType, factory.MultiNetworkPolicyType: @@ -92,8 +91,7 @@ func (h *baseNetworkControllerEventHandler) areResourcesEqual(objType reflect.Ty case factory.EgressIPType, factory.EgressIPNamespaceType, - factory.EgressNodeType, - factory.CloudPrivateIPConfigType: + factory.EgressNodeType: // force update path for EgressIP resource. return false, nil @@ -167,9 +165,6 @@ func (h *baseNetworkControllerEventHandler) getResourceFromInformerCache(objType case factory.EgressIPType: obj, err = watchFactory.GetEgressIP(name) - case factory.CloudPrivateIPConfigType: - obj, err = watchFactory.GetCloudPrivateIPConfig(name) - case factory.MultiNetworkPolicyType: obj, err = watchFactory.GetMultiNetworkPolicy(namespace, name) @@ -199,7 +194,6 @@ func needsUpdateDuringRetry(objType reflect.Type) bool { factory.EgressIPType, factory.EgressIPPodType, factory.EgressIPNamespaceType, - factory.CloudPrivateIPConfigType, factory.MultiNetworkPolicyType: return true } diff --git a/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go b/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go index 439a770b59..eb161051ee 100644 --- a/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go +++ b/go-controller/pkg/ovn/controller/egress_services/egress_services_node.go @@ -5,8 +5,10 @@ import ( "encoding/json" "fmt" "net" + "os" "sort" "sync" + "syscall" "time" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" @@ -80,6 +82,89 @@ func (c *Controller) CheckNodesReachabilityIterate() { } } +type egressSVCDialer interface { + dial(ip net.IP, timeout time.Duration) bool +} + +var dialer egressSVCDialer = &egressSVCDial{} + +type egressSVCDial struct{} + +// Blantant copy from: https://github.com/openshift/sdn/blob/master/pkg/network/common/egressip.go#L499-L505 +// Ping a node and return whether or not we think it is online. We do this by trying to +// open a TCP connection to the "discard" service (port 9); if the node is offline, the +// attempt will either time out with no response, or else return "no route to host" (and +// we will return false). If the node is online then we presumably will get a "connection +// refused" error; but the code below assumes that anything other than timeout or "no +// route" indicates that the node is online. +func (e *egressSVCDial) dial(ip net.IP, timeout time.Duration) bool { + conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip.String(), "9"), timeout) + if conn != nil { + conn.Close() + } + if opErr, ok := err.(*net.OpError); ok { + if opErr.Timeout() { + return false + } + if sysErr, ok := opErr.Err.(*os.SyscallError); ok && sysErr.Err == syscall.EHOSTUNREACH { + return false + } + } + return true +} + +func IsReachableViaGRPC(mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient, healthCheckPort, totalTimeout int) bool { + dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Duration(totalTimeout)*time.Second) + defer dialCancel() + + if !healthClient.IsConnected() { + // gRPC session is not up. Attempt to connect and if that suceeds, we will declare node as reacheable. + return healthClient.Connect(dialCtx, mgmtIPs, healthCheckPort) + } + + // gRPC session is already established. Send a probe, which will succeed, or close the session. + return healthClient.Probe(dialCtx) +} + +func IsReachableLegacy(node string, mgmtIPs []net.IP, totalTimeout int) bool { + var retryTimeOut, initialRetryTimeOut time.Duration + + numMgmtIPs := len(mgmtIPs) + if numMgmtIPs == 0 { + return false + } + + switch totalTimeout { + // Check if we need to do node reachability check + case 0: + return true + case 1: + // Using time duration for initial retry with 700/numIPs msec and retry of 100/numIPs msec + // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. + initialRetryTimeOut = time.Duration(700/numMgmtIPs) * time.Millisecond + retryTimeOut = time.Duration(100/numMgmtIPs) * time.Millisecond + default: + // Using time duration for initial retry with 900/numIPs msec + // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. + initialRetryTimeOut = time.Duration(900/numMgmtIPs) * time.Millisecond + retryTimeOut = initialRetryTimeOut + } + + timeout := initialRetryTimeOut + endTime := time.Now().Add(time.Second * time.Duration(totalTimeout)) + for time.Now().Before(endTime) { + for _, ip := range mgmtIPs { + if dialer.dial(ip, timeout) { + return true + } + } + time.Sleep(100 * time.Millisecond) + timeout = retryTimeOut + } + klog.Errorf("Failed reachability check for %s", node) + return false +} + func (c *Controller) onNodeAdd(obj interface{}) { key, err := cache.MetaNamespaceKeyFunc(obj) if err != nil { diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index a77dc7f008..6018f5c75f 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -8,7 +8,6 @@ import ( "sync" "time" - ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" @@ -88,7 +87,7 @@ type DefaultNetworkController struct { defaultCOPPUUID string // Controller used for programming OVN for egress IP - eIPC egressIPController + eIPC egressIPZoneController // Controller used to handle services svcController *svccontroller.Controller @@ -121,9 +120,6 @@ type DefaultNetworkController struct { hybridOverlayFailed sync.Map syncZoneICFailed sync.Map - // retry framework for Cloud private IP config - retryCloudPrivateIPConfig *retry.RetryFramework - // variable to determine if all pods present on the node during startup have been processed // updated atomically allInitialPodsProcessed uint32 @@ -187,19 +183,13 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, }, externalGWCache: make(map[ktypes.NamespacedName]*externalRouteInfo), exGWCacheMutex: sync.RWMutex{}, - eIPC: egressIPController{ - egressIPAssignmentMutex: &sync.Mutex{}, - podAssignmentMutex: &sync.Mutex{}, - nodeIPUpdateMutex: &sync.Mutex{}, - podAssignment: make(map[string]*podAssignmentState), - pendingCloudPrivateIPConfigsMutex: &sync.Mutex{}, - pendingCloudPrivateIPConfigsOps: make(map[string]map[string]*cloudPrivateIPConfigOp), - allocator: allocator{&sync.Mutex{}, make(map[string]*egressNode)}, - nbClient: cnci.nbClient, - watchFactory: cnci.watchFactory, - egressIPTotalTimeout: config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout, - reachabilityCheckInterval: egressIPReachabilityCheckInterval, - egressIPNodeHealthCheckPort: config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort, + eIPC: egressIPZoneController{ + nodeIPUpdateMutex: &sync.Mutex{}, + podAssignmentMutex: &sync.Mutex{}, + podAssignment: make(map[string]*podAssignmentState), + nbClient: cnci.nbClient, + watchFactory: cnci.watchFactory, + nodeZoneState: syncmap.NewSyncMap[bool](), }, loadbalancerClusterCache: make(map[kapi.Protocol]string), clusterLoadBalancerGroupUUID: "", @@ -235,7 +225,6 @@ func (oc *DefaultNetworkController) initRetryFramework() { oc.retryEgressIPPods = oc.newRetryFramework(factory.EgressIPPodType) oc.retryEgressNodes = oc.newRetryFramework(factory.EgressNodeType) oc.retryEgressFwNodes = oc.newRetryFramework(factory.EgressFwNodeType) - oc.retryCloudPrivateIPConfig = oc.newRetryFramework(factory.CloudPrivateIPConfigType) oc.retryNamespaces = oc.newRetryFramework(factory.NamespaceType) oc.retryNetworkPolicies = oc.newRetryFramework(factory.PolicyType) } @@ -455,11 +444,6 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { if err := WithSyncDurationMetric("egress ip", oc.WatchEgressIP); err != nil { return err } - if util.PlatformTypeIsEgressIPCloudProvider() { - if err := WithSyncDurationMetric("could private ip config", oc.WatchCloudPrivateIPConfig); err != nil { - return err - } - } if config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout == 0 { klog.V(2).Infof("EgressIP node reachability check disabled") } else if config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort != 0 { @@ -772,7 +756,23 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from case factory.EgressNodeType: node := obj.(*kapi.Node) - return h.oc.reconcileNodeForEgressIP(nil, node) + // Update node in zone cache; value will be true if node is local + // to this zone and false if its not + h.oc.eIPC.nodeZoneState.LockKey(node.Name) + h.oc.eIPC.nodeZoneState.Store(node.Name, h.oc.isLocalZoneNode(node)) + h.oc.eIPC.nodeZoneState.UnlockKey(node.Name) + // add the nodeIP to the default LRP (102 priority) destination address-set + err := h.oc.ensureDefaultNoRerouteNodePolicies() + if err != nil { + return err + } + // add the GARP configuration for all the new nodes we get + // since we use the "exclude-lb-vips-from-garp": "true" + // we shouldn't have scale issues + // NOTE: Adding GARP needs to be done only during node add + // It is a one time operation and doesn't need to be done during + // node updates. It needs to be done only for nodes local to this zone + return h.oc.addEgressNode(node) case factory.EgressFwNodeType: node := obj.(*kapi.Node) @@ -782,10 +782,6 @@ func (h *defaultNetworkControllerEventHandler) AddResource(obj interface{}, from return err } - case factory.CloudPrivateIPConfigType: - cloudPrivateIPConfig := obj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) - return h.oc.reconcileCloudPrivateIPConfig(nil, cloudPrivateIPConfig) - case factory.NamespaceType: ns, ok := obj.(*kapi.Namespace) if !ok { @@ -896,18 +892,26 @@ func (h *defaultNetworkControllerEventHandler) UpdateResource(oldObj, newObj int case factory.EgressNodeType: oldNode := oldObj.(*kapi.Node) newNode := newObj.(*kapi.Node) - return h.oc.reconcileNodeForEgressIP(oldNode, newNode) + // Update node in zone cache; value will be true if node is local + // to this zone and false if its not + h.oc.eIPC.nodeZoneState.LockKey(newNode.Name) + h.oc.eIPC.nodeZoneState.Store(newNode.Name, h.oc.isLocalZoneNode(newNode)) + h.oc.eIPC.nodeZoneState.UnlockKey(newNode.Name) + // update the nodeIP in the defalt-reRoute (102 priority) destination address-set + if util.NodeHostAddressesAnnotationChanged(oldNode, newNode) { + klog.Infof("Egress IP detected IP address change for node %s. Updating no re-route policies", newNode.Name) + err := h.oc.ensureDefaultNoRerouteNodePolicies() + if err != nil { + return err + } + } + return nil case factory.EgressFwNodeType: oldNode := oldObj.(*kapi.Node) newNode := newObj.(*kapi.Node) return h.oc.updateEgressFirewallForNode(oldNode, newNode) - case factory.CloudPrivateIPConfigType: - oldCloudPrivateIPConfig := oldObj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) - newCloudPrivateIPConfig := newObj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) - return h.oc.reconcileCloudPrivateIPConfig(oldCloudPrivateIPConfig, newCloudPrivateIPConfig) - case factory.NamespaceType: oldNs, newNs := oldObj.(*kapi.Namespace), newObj.(*kapi.Namespace) return h.oc.updateNamespace(oldNs, newNs) @@ -966,7 +970,20 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int case factory.EgressNodeType: node := obj.(*kapi.Node) - return h.oc.reconcileNodeForEgressIP(node, nil) + // remove the GARP setup for the node + if err := h.oc.deleteEgressNode(node); err != nil { + return err + } + // remove the IPs from the destination address-set of the default LRP (102) + err := h.oc.ensureDefaultNoRerouteNodePolicies() + if err != nil { + return err + } + // Update node in zone cache; remove the node key since node has been deleted. + h.oc.eIPC.nodeZoneState.LockKey(node.Name) + h.oc.eIPC.nodeZoneState.Delete(node.Name) + h.oc.eIPC.nodeZoneState.UnlockKey(node.Name) + return nil case factory.EgressFwNodeType: node, ok := obj.(*kapi.Node) @@ -975,10 +992,6 @@ func (h *defaultNetworkControllerEventHandler) DeleteResource(obj, cachedObj int } return h.oc.updateEgressFirewallForNode(node, nil) - case factory.CloudPrivateIPConfigType: - cloudPrivateIPConfig := obj.(*ocpcloudnetworkapi.CloudPrivateIPConfig) - return h.oc.reconcileCloudPrivateIPConfig(cloudPrivateIPConfig, nil) - case factory.NamespaceType: ns := obj.(*kapi.Namespace) return h.oc.deleteNamespace(ns) @@ -1018,8 +1031,7 @@ func (h *defaultNetworkControllerEventHandler) SyncFunc(objs []interface{}) erro syncFunc = nil case factory.EgressIPPodType, - factory.EgressIPType, - factory.CloudPrivateIPConfigType: + factory.EgressIPType: syncFunc = nil case factory.NamespaceType: diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index ec43526802..72f62577fe 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -1,21 +1,15 @@ package ovn import ( - "context" - "encoding/hex" "encoding/json" "errors" "fmt" "net" - "os" "reflect" - "sort" "strings" "sync" - "syscall" "time" - ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/libovsdb/ovsdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -26,7 +20,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -57,256 +51,103 @@ func getEgressIPAddrSetDbIDs(name egressIpAddrSetName, controller string) *libov }) } -type egressIPDialer interface { - dial(ip net.IP, timeout time.Duration) bool -} - -var dialer egressIPDialer = &egressIPDial{} - -type healthcheckClientAllocator interface { - allocate(nodeName string) healthcheck.EgressIPHealthClient -} - -var hccAllocator healthcheckClientAllocator = &egressIPHealthcheckClientAllocator{} - +// main reconcile functions begin here + +// reconcileEgressIP reconciles the database configuration +// setup in nbdb based on the received egressIP objects +// CASE 1: if old == nil && new != nil {add event, we do a full setup for all statuses} +// CASE 2: if old != nil && new == nil {delete event, we do a full teardown for all statuses} +// CASE 3: if old != nil && new != nil {update event, +// CASE 3.1: we calculate based on difference between old and new statuses +// which ones need teardown and which ones need setup +// this ensures there is no disruption for things that did not change +// CASE 3.2: Only Namespace selectors on Spec changed +// CASE 3.3: Only Pod Selectors on Spec changed +// CASE 3.4: Both Namespace && Pod Selectors on Spec changed +// } +// NOTE: `Spec.EgressIPs`` updates for EIP object are not processed here, that is the job of cluster manager +// We only care about `Spec.NamespaceSelector`, `Spec.PodSelector` and `Status` field func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.EgressIP) (err error) { - // Lock the assignment, this is needed because this function can end up - // being called from WatchEgressNodes and WatchEgressIP, i.e: two different - // go-routines and we need to make sure the assignment is safe. - oc.eIPC.egressIPAssignmentMutex.Lock() - defer oc.eIPC.egressIPAssignmentMutex.Unlock() - - // Initialize an empty name which is filled depending on the operation - // (ADD/UPDATE/DELETE) we are performing. This is done as to be able to - // delete the NB DB set up correctly when searching the DB based on the - // name. - name := "" - - // Initialize a status which will be used to compare against - // new.spec.egressIPs and decide on what from the status should get deleted - // or kept. - status := []egressipv1.EgressIPStatusItem{} - - // Initialize two empty objects as to avoid SIGSEGV. The code should play - // nicely with empty objects though. - oldEIP, newEIP := &egressipv1.EgressIP{}, &egressipv1.EgressIP{} - - // Initialize two "nothing" selectors. Nothing selector are semantically - // opposed to "empty" selectors, i.e: they select and match nothing, while - // an empty one matches everything. If old/new are nil, and we don't do - // this: we would have an empty EgressIP object which would result in two - // empty selectors, matching everything, whereas we would mean the inverse - newNamespaceSelector, _ := metav1.LabelSelectorAsSelector(nil) - oldNamespaceSelector, _ := metav1.LabelSelectorAsSelector(nil) - // Initialize a sets.String which holds egress IPs that were not fully assigned - // but are allocated and they are meant to be removed. - staleEgressIPs := sets.NewString() - if old != nil { - oldEIP = old - oldNamespaceSelector, err = metav1.LabelSelectorAsSelector(&oldEIP.Spec.NamespaceSelector) - if err != nil { - return fmt.Errorf("invalid old namespaceSelector, err: %v", err) - } - name = oldEIP.Name - status = oldEIP.Status.Items - staleEgressIPs.Insert(oldEIP.Spec.EgressIPs...) - } - if new != nil { - newEIP = new - newNamespaceSelector, err = metav1.LabelSelectorAsSelector(&newEIP.Spec.NamespaceSelector) - if err != nil { - return fmt.Errorf("invalid new namespaceSelector, err: %v", err) - } - name = newEIP.Name - status = newEIP.Status.Items - if staleEgressIPs.Len() > 0 { - for _, egressIP := range newEIP.Spec.EgressIPs { - if staleEgressIPs.Has(egressIP) { - staleEgressIPs.Delete(egressIP) - } + // CASE 1: EIP object deletion, we need to teardown database configuration for all the statuses + if old != nil && new == nil { + removeStatus := old.Status.Items + if len(removeStatus) > 0 { + if err := oc.deleteEgressIPAssignments(old.Name, removeStatus); err != nil { + return err } } } - - // We do not initialize a nothing selector for the podSelector, because - // these are allowed to be empty (i.e: matching all pods in a namespace), as - // supposed to the namespaceSelector - newPodSelector, err := metav1.LabelSelectorAsSelector(&newEIP.Spec.PodSelector) - if err != nil { - return fmt.Errorf("invalid new podSelector, err: %v", err) - } - oldPodSelector, err := metav1.LabelSelectorAsSelector(&oldEIP.Spec.PodSelector) - if err != nil { - return fmt.Errorf("invalid old podSelector, err: %v", err) - } - - // Validate the spec and use only the valid egress IPs when performing any - // successive operations, theoretically: the user could specify invalid IP - // addresses, which would break us. - validSpecIPs, err := oc.validateEgressIPSpec(name, newEIP.Spec.EgressIPs) - if err != nil { - return fmt.Errorf("invalid EgressIP spec, err: %v", err) - } - - // Validate the status, on restart it could be the case that what might have - // been assigned when ovnkube-master last ran is not a valid assignment - // anymore (specifically if ovnkube-master has been crashing for a while). - // Any invalid status at this point in time needs to be removed and assigned - // to a valid node. - validStatus, invalidStatus := oc.validateEgressIPStatus(name, status) - for status := range validStatus { - // If the spec has changed and an egress IP has been removed by the - // user: we need to un-assign that egress IP - if !validSpecIPs.Has(status.EgressIP) { - invalidStatus[status] = "" - delete(validStatus, status) - } - } - - invalidStatusLen := len(invalidStatus) - if invalidStatusLen > 0 { - metrics.RecordEgressIPRebalance(invalidStatusLen) - } - - // Add only the diff between what is requested and valid and that which - // isn't already assigned. - ipsToAssign := validSpecIPs - ipsToRemove := sets.New[string]() - statusToAdd := make([]egressipv1.EgressIPStatusItem, 0, len(ipsToAssign)) - statusToKeep := make([]egressipv1.EgressIPStatusItem, 0, len(validStatus)) - for status := range validStatus { - statusToKeep = append(statusToKeep, status) - ipsToAssign.Delete(status.EgressIP) - } - statusToRemove := make([]egressipv1.EgressIPStatusItem, 0, invalidStatusLen) - for status := range invalidStatus { - statusToRemove = append(statusToRemove, status) - ipsToRemove.Insert(status.EgressIP) - } - if ipsToRemove.Len() > 0 { - // The following is added as to ensure that we only add after having - // successfully removed egress IPs. This case is not very important on - // bare-metal (since we execute the add after the remove below, and - // hence have full control of the execution - barring its success), but - // on a cloud: we don't execute anything below, we wait for the status - // on the CloudPrivateIPConfig(s) we create to be set before executing - // anything in the OVN DB. So, we need to make sure that we delete and - // then add, mainly because if EIP1 is added to nodeX and then EIP2 is - // removed from nodeX, we might remove the setup made for EIP1. The - // add/delete ordering of events is not guaranteed on the cloud where we - // depend on other controllers to execute the work for us however. By - // comparing the spec to the status and applying the following truth - // table we can ensure that order of events. - - // case ID | Egress IP to add | Egress IP to remove | ipsToAssign - // 1 | e1 | e1 | e1 - // 2 | e2 | e1 | - - // 3 | e2 | - | e2 - // 4 | - | e1 | - - - // Case 1 handles updates. Case 2 and 3 makes sure we don't add until we - // successfully delete. Case 4 just shows an example of what would - // happen if we don't have anything to add - ipsToAssign = ipsToAssign.Intersection(ipsToRemove) - } - - if !util.PlatformTypeIsEgressIPCloudProvider() { - if len(statusToRemove) > 0 { - // Delete the statusToRemove from the allocator cache. If we don't - // do this we will occupy assignment positions for the ipsToAssign, - // even though statusToRemove will be removed afterwards - oc.deleteAllocatorEgressIPAssignments(statusToRemove) - if err := oc.deleteEgressIPAssignments(name, statusToRemove); err != nil { + // CASE 2: EIP object addition, we need to setup database configuration for all the statuses + if old == nil && new != nil { + addStatus := new.Status.Items + if len(addStatus) > 0 { + if err := oc.addEgressIPAssignments(new.Name, addStatus, new.Spec.NamespaceSelector, new.Spec.PodSelector); err != nil { return err } } - if len(ipsToAssign) > 0 { - statusToAdd = oc.assignEgressIPs(name, ipsToAssign.UnsortedList()) - statusToKeep = append(statusToKeep, statusToAdd...) - } - // Assign all statusToKeep, we need to warm up the podAssignment cache - // on restart. We won't perform any additional transactions to the NB DB - // for things which exists because the libovsdb operations use - // modelClient which is idempotent. - if err := oc.addEgressIPAssignments(name, statusToKeep, newEIP.Spec.NamespaceSelector, newEIP.Spec.PodSelector); err != nil { - return err - } - // Add all assignments which are to be kept to the allocator cache, - // allowing us to track all assignments which have been performed and - // avoid incorrect future assignments due to a de-synchronized cache. - oc.addAllocatorEgressIPAssignments(name, statusToKeep) - // Update the object only on an ADD/UPDATE. If we are processing a - // DELETE, new will be nil and we should not update the object. - if len(statusToAdd) > 0 || (len(statusToRemove) > 0 && new != nil) { - if err := oc.patchReplaceEgressIPStatus(name, statusToKeep); err != nil { - return err + } + // CASE 3: EIP object update + if old != nil && new != nil { + oldEIP := old + newEIP := new + // CASE 3.1: we need to see which statuses + // 1) need teardown + // 2) need setup + // 3) need no-op + if !reflect.DeepEqual(oldEIP.Status.Items, newEIP.Status.Items) { + statusToRemove := make(map[string]egressipv1.EgressIPStatusItem, 0) + statusToKeep := make(map[string]egressipv1.EgressIPStatusItem, 0) + for _, status := range oldEIP.Status.Items { + statusToRemove[status.EgressIP] = status + } + for _, status := range newEIP.Status.Items { + statusToKeep[status.EgressIP] = status + } + // only delete items that were in the oldSpec but cannot be found in the newSpec + statusToDelete := make([]egressipv1.EgressIPStatusItem, 0) + for eIP, oldStatus := range statusToRemove { + if newStatus, ok := statusToKeep[eIP]; ok && newStatus.Node == oldStatus.Node { + continue + } + statusToDelete = append(statusToDelete, oldStatus) } - } - } else { - // Even when running on a public cloud, we must make sure that we unwire EgressIP - // configuration from OVN *before* we instruct the CloudNetworkConfigController - // to remove the CloudPrivateIPConfig object from the cloud. - // CloudPrivateIPConfig objects can be in the "Deleting" state for a long time, - // waiting for the underlying cloud to finish its action and to report success of the - // unattach operation. Some clouds such as Azure will remove the IP address nearly - // immediately, but then they will take a long time (seconds to minutes) to actually report - // success of the removal operation. - if len(statusToRemove) > 0 { - // Delete all assignments that are to be removed from the allocator - // cache. If we don't do this we will occupy assignment positions for - // the ipsToAdd, even though statusToRemove will be removed afterwards - oc.deleteAllocatorEgressIPAssignments(statusToRemove) - if err := oc.deleteEgressIPAssignments(name, statusToRemove); err != nil { - return err + if len(statusToDelete) > 0 { + if err := oc.deleteEgressIPAssignments(old.Name, statusToDelete); err != nil { + return err + } } - } - // When egress IP is not fully assigned to a node, then statusToRemove may not - // have those entries, hence retrieve it from staleEgressIPs for removing - // the item from cloudprivateipconfig. - for _, toRemove := range statusToRemove { - if !staleEgressIPs.Has(toRemove.EgressIP) { - continue + // only add items that were NOT in the oldSpec but can be found in the newSpec + statusToAdd := make([]egressipv1.EgressIPStatusItem, 0) + for eIP, newStatus := range statusToKeep { + if oldStatus, ok := statusToRemove[eIP]; ok && oldStatus.Node == newStatus.Node { + continue + } + statusToAdd = append(statusToAdd, newStatus) } - staleEgressIPs.Delete(toRemove.EgressIP) - } - for staleEgressIP := range staleEgressIPs { - if nodeName := oc.deleteAllocatorEgressIPAssignmentIfExists(name, staleEgressIP); nodeName != "" { - statusToRemove = append(statusToRemove, - egressipv1.EgressIPStatusItem{EgressIP: staleEgressIP, Node: nodeName}) + if len(statusToAdd) > 0 { + if err := oc.addEgressIPAssignments(new.Name, statusToAdd, new.Spec.NamespaceSelector, new.Spec.PodSelector); err != nil { + return err + } } } - // If running on a public cloud we should not program OVN just yet for assignment - // operations. We need confirmation from the cloud-network-config-controller that - // it can assign the IPs. reconcileCloudPrivateIPConfig will take care of - // processing the answer from the requests we make here, and update OVN - // accordingly when we know what the outcome is. - if len(ipsToAssign) > 0 { - statusToAdd = oc.assignEgressIPs(name, ipsToAssign.UnsortedList()) - statusToKeep = append(statusToKeep, statusToAdd...) - } - // Same as above: Add all assignments which are to be kept to the - // allocator cache, allowing us to track all assignments which have been - // performed and avoid incorrect future assignments due to a - // de-synchronized cache. - oc.addAllocatorEgressIPAssignments(name, statusToKeep) - - // Execute CloudPrivateIPConfig changes for assignments which need to be - // added/removed, assignments which don't change do not require any - // further setup. - if err := oc.executeCloudPrivateIPConfigChange(name, statusToAdd, statusToRemove); err != nil { - return err - } - } - - // Record the egress IP allocator count - metrics.RecordEgressIPCount(getEgressIPAllocationTotalCount(oc.eIPC.allocator)) - // If nothing has changed for what concerns the assignments, then check if - // the namespaceSelector and podSelector have changed. If they have changed - // then remove the setup for all pods which matched the old and add - // everything for all pods which match the new. - if len(ipsToAssign) == 0 && - len(statusToRemove) == 0 { + oldNamespaceSelector, err := metav1.LabelSelectorAsSelector(&oldEIP.Spec.NamespaceSelector) + if err != nil { + return fmt.Errorf("invalid old namespaceSelector, err: %v", err) + } + newNamespaceSelector, err := metav1.LabelSelectorAsSelector(&newEIP.Spec.NamespaceSelector) + if err != nil { + return fmt.Errorf("invalid new namespaceSelector, err: %v", err) + } + oldPodSelector, err := metav1.LabelSelectorAsSelector(&oldEIP.Spec.PodSelector) + if err != nil { + return fmt.Errorf("invalid old podSelector, err: %v", err) + } + newPodSelector, err := metav1.LabelSelectorAsSelector(&newEIP.Spec.PodSelector) + if err != nil { + return fmt.Errorf("invalid new podSelector, err: %v", err) + } + // CASE 3.2: Only Namespace selectors on Spec changed // Only the namespace selector changed: remove the setup for all pods // matching the old and not matching the new, and add setup for the pod // matching the new and which didn't match the old. @@ -318,16 +159,17 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, namespace := range namespaces { namespaceLabels := labels.Set(namespace.Labels) if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { - if err := oc.deleteNamespaceEgressIPAssignment(name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { + if err := oc.deleteNamespaceEgressIPAssignment(oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { return err } } if newNamespaceSelector.Matches(namespaceLabels) && !oldNamespaceSelector.Matches(namespaceLabels) { - if err := oc.addNamespaceEgressIPAssignments(name, newEIP.Status.Items, namespace, newEIP.Spec.PodSelector); err != nil { + if err := oc.addNamespaceEgressIPAssignments(newEIP.Name, newEIP.Status.Items, namespace, newEIP.Spec.PodSelector); err != nil { return err } } } + // CASE 3.3: Only Pod Selectors on Spec changed // Only the pod selector changed: remove the setup for all pods // matching the old and not matching the new, and add setup for the pod // matching the new and which didn't match the old. @@ -344,7 +186,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, pod := range pods { podLabels := labels.Set(pod.Labels) if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { - if err := oc.deletePodEgressIPAssignments(name, oldEIP.Status.Items, pod); err != nil { + if err := oc.deletePodEgressIPAssignments(oldEIP.Name, oldEIP.Status.Items, pod); err != nil { return err } } @@ -352,12 +194,13 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres continue } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { - if err := oc.addPodEgressIPAssignmentsWithLock(name, newEIP.Status.Items, pod); err != nil { + if err := oc.addPodEgressIPAssignmentsWithLock(newEIP.Name, newEIP.Status.Items, pod); err != nil { return err } } } } + // CASE 3.4: Both Namespace && Pod Selectors on Spec changed // Both selectors changed: remove the setup for pods matching the // old ones and not matching the new ones, and add setup for all // matching the new ones but which didn't match the old ones. @@ -371,7 +214,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres // If the namespace does not match anymore then there's no // reason to look at the pod selector. if !newNamespaceSelector.Matches(namespaceLabels) && oldNamespaceSelector.Matches(namespaceLabels) { - if err := oc.deleteNamespaceEgressIPAssignment(name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { + if err := oc.deleteNamespaceEgressIPAssignment(oldEIP.Name, oldEIP.Status.Items, namespace, oldEIP.Spec.PodSelector); err != nil { return err } } @@ -387,7 +230,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, pod := range pods { podLabels := labels.Set(pod.Labels) if newPodSelector.Matches(podLabels) { - if err := oc.addPodEgressIPAssignmentsWithLock(name, newEIP.Status.Items, pod); err != nil { + if err := oc.addPodEgressIPAssignmentsWithLock(newEIP.Name, newEIP.Status.Items, pod); err != nil { return err } } @@ -403,7 +246,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres for _, pod := range pods { podLabels := labels.Set(pod.Labels) if !newPodSelector.Matches(podLabels) && oldPodSelector.Matches(podLabels) { - if err := oc.deletePodEgressIPAssignments(name, oldEIP.Status.Items, pod); err != nil { + if err := oc.deletePodEgressIPAssignments(oldEIP.Name, oldEIP.Status.Items, pod); err != nil { return err } } @@ -411,7 +254,7 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres continue } if newPodSelector.Matches(podLabels) && !oldPodSelector.Matches(podLabels) { - if err := oc.addPodEgressIPAssignmentsWithLock(name, newEIP.Status.Items, pod); err != nil { + if err := oc.addPodEgressIPAssignmentsWithLock(newEIP.Name, newEIP.Status.Items, pod); err != nil { return err } } @@ -423,6 +266,9 @@ func (oc *DefaultNetworkController) reconcileEgressIP(old, new *egressipv1.Egres return nil } +// reconcileEgressIPNamespace reconciles the database configuration setup in nbdb +// based on received namespace objects. +// NOTE: we only care about namespace label updates func (oc *DefaultNetworkController) reconcileEgressIPNamespace(old, new *v1.Namespace) error { // Same as for reconcileEgressIP: labels play nicely with empty object, not // nil ones. @@ -468,6 +314,9 @@ func (oc *DefaultNetworkController) reconcileEgressIPNamespace(old, new *v1.Name return nil } +// reconcileEgressIPPod reconciles the database configuration setup in nbdb +// based on received pod objects. +// NOTE: we only care about pod label updates func (oc *DefaultNetworkController) reconcileEgressIPPod(old, new *v1.Pod) (err error) { oldPod, newPod := &v1.Pod{}, &v1.Pod{} namespace := &v1.Namespace{} @@ -569,479 +418,7 @@ func (oc *DefaultNetworkController) reconcileEgressIPPod(old, new *v1.Pod) (err return nil } -func (oc *DefaultNetworkController) reconcileCloudPrivateIPConfig(old, new *ocpcloudnetworkapi.CloudPrivateIPConfig) error { - oldCloudPrivateIPConfig, newCloudPrivateIPConfig := &ocpcloudnetworkapi.CloudPrivateIPConfig{}, &ocpcloudnetworkapi.CloudPrivateIPConfig{} - shouldDelete, shouldAdd := false, false - nodeToDelete := "" - - if old != nil { - oldCloudPrivateIPConfig = old - // We need to handle three types of deletes, A) object UPDATE where the - // old egress IP <-> node assignment has been removed. This is indicated - // by the old object having a .status.node set and the new object having - // .status.node empty and the condition on the new being successful. B) - // object UPDATE where egress IP <-> node assignment has been updated. - // This is indicated by .status.node being different on old and new - // objects. C) object DELETE, for which new is nil - shouldDelete = oldCloudPrivateIPConfig.Status.Node != "" || new == nil - // On DELETE we need to delete the .spec.node for the old object - nodeToDelete = oldCloudPrivateIPConfig.Spec.Node - } - if new != nil { - newCloudPrivateIPConfig = new - // We should only proceed to setting things up for objects where the new - // object has the same .spec.node and .status.node, and assignment - // condition being true. This is how the cloud-network-config-controller - // indicates a successful cloud assignment. - shouldAdd = newCloudPrivateIPConfig.Status.Node == newCloudPrivateIPConfig.Spec.Node && - ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned && - kapi.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == kapi.ConditionTrue - // See above explanation for the delete - shouldDelete = shouldDelete && - (newCloudPrivateIPConfig.Status.Node == "" || newCloudPrivateIPConfig.Status.Node != oldCloudPrivateIPConfig.Status.Node) && - ocpcloudnetworkapi.CloudPrivateIPConfigConditionType(newCloudPrivateIPConfig.Status.Conditions[0].Type) == ocpcloudnetworkapi.Assigned && - kapi.ConditionStatus(newCloudPrivateIPConfig.Status.Conditions[0].Status) == kapi.ConditionTrue - // On UPDATE we need to delete the old .status.node - if shouldDelete { - nodeToDelete = oldCloudPrivateIPConfig.Status.Node - } - } - - // As opposed to reconcileEgressIP, here we are only interested in changes - // made to the status (since we are the only ones performing the change made - // to the spec). So don't process the object if there is no change made to - // the status. - if reflect.DeepEqual(oldCloudPrivateIPConfig.Status, newCloudPrivateIPConfig.Status) { - return nil - } - - if shouldDelete { - // Get the EgressIP owner reference - egressIPName, exists := oldCloudPrivateIPConfig.Annotations[util.OVNEgressIPOwnerRefLabel] - if !exists { - // If a CloudPrivateIPConfig object does not have an egress IP owner reference annotation upon deletion, - // there is no way that the object will get one after deletion. Hence, simply log a warning message here - // for informative purposes instead of throwing the same error and retrying time and time again. - klog.Warningf("CloudPrivateIPConfig object %q was missing the egress IP owner reference annotation "+ - "upon deletion", oldCloudPrivateIPConfig.Name) - return nil - } - // Check if the egress IP has been deleted or not, if we are processing - // a CloudPrivateIPConfig delete because the EgressIP has been deleted - // then we need to remove the setup made for it, but not update the - // object. - egressIP, err := oc.kube.GetEgressIP(egressIPName) - isDeleted := apierrors.IsNotFound(err) - if err != nil && !isDeleted { - return err - } - egressIPString := cloudPrivateIPConfigNameToIPString(oldCloudPrivateIPConfig.Name) - statusItem := egressipv1.EgressIPStatusItem{ - Node: nodeToDelete, - EgressIP: egressIPString, - } - // In many cases, this here is likely redundant as we already run this inside - // reconcileEgressIP before instructing the CloudPrivateIP reconciler to delete - // it again. But running oc.deleteEgressIPAssignments twice shouldn't hurt, and - // this is also needed if someone manually deletes the CloudPrivateIP, but keeps - // the EgressIP. Therefore, for safe measure, better delete the flows twice. In - // the future, let's possibly reevaluate if this is needed. - if err := oc.deleteEgressIPAssignments(egressIPName, []egressipv1.EgressIPStatusItem{statusItem}); err != nil { - return err - } - // If we are not processing a delete, update the EgressIP object's - // status assignments - if !isDeleted { - // Deleting a status here means updating the object with the statuses we - // want to keep - updatedStatus := []egressipv1.EgressIPStatusItem{} - for _, status := range egressIP.Status.Items { - if !reflect.DeepEqual(status, statusItem) { - updatedStatus = append(updatedStatus, status) - } - } - if err := oc.patchReplaceEgressIPStatus(egressIP.Name, updatedStatus); err != nil { - return err - } - } - resyncEgressIPs, err := oc.removePendingOpsAndGetResyncs(egressIPName, egressIPString) - if err != nil { - return err - } - for _, resyncEgressIP := range resyncEgressIPs { - if err := oc.reconcileEgressIP(nil, &resyncEgressIP); err != nil { - return fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", egressIP.Name, err) - } - } - } - if shouldAdd { - // Get the EgressIP owner reference - egressIPName, exists := newCloudPrivateIPConfig.Annotations[util.OVNEgressIPOwnerRefLabel] - if !exists { - // If a CloudPrivateIPConfig object does not have an egress IP owner reference annotation upon creation - // then we should simply log this as a warning. We should get an update action later down the road where we - // then take care of the rest. Hence, do not throw an error here to avoid rescheduling. Even though not - // officially supported, think of someone creating a CloudPrivateIPConfig object manually which will never - // get the annotation. - klog.Warningf("CloudPrivateIPConfig object %q is missing the egress IP owner reference annotation. Skipping", - oldCloudPrivateIPConfig.Name) - return nil - } - egressIP, err := oc.kube.GetEgressIP(egressIPName) - if err != nil { - return err - } - egressIPString := cloudPrivateIPConfigNameToIPString(newCloudPrivateIPConfig.Name) - statusItem := egressipv1.EgressIPStatusItem{ - Node: newCloudPrivateIPConfig.Status.Node, - EgressIP: egressIPString, - } - if err := oc.addEgressIPAssignments(egressIP.Name, []egressipv1.EgressIPStatusItem{statusItem}, egressIP.Spec.NamespaceSelector, egressIP.Spec.PodSelector); err != nil { - return err - } - // Guard against performing the same assignment twice, which might - // happen when multiple updates come in on the same object. - hasStatus := false - for _, status := range egressIP.Status.Items { - if reflect.DeepEqual(status, statusItem) { - hasStatus = true - break - } - } - if !hasStatus { - statusToKeep := append(egressIP.Status.Items, statusItem) - if err := oc.patchReplaceEgressIPStatus(egressIP.Name, statusToKeep); err != nil { - return err - } - } - - oc.eIPC.pendingCloudPrivateIPConfigsMutex.Lock() - defer oc.eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() - // Remove the finished add / update operation from the pending cache. We - // never process add and deletes in the same sync, and for updates: - // deletes are always performed before adds, hence we should only ever - // fully delete the item from the pending cache once the add has - // finished. - ops, pending := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] - if !pending { - // Do not return an error here, it will lead to spurious error - // messages on restart because we will process a bunch of adds for - // all existing objects, for which no CR was issued. - klog.V(5).Infof("No pending operation found for EgressIP: %s while processing created CloudPrivateIPConfig", egressIPName) - return nil - } - op, exists := ops[egressIPString] - if !exists { - klog.V(5).Infof("Pending operations found for EgressIP: %s, but not for the created CloudPrivateIPConfig: %s", egressIPName, egressIPString) - return nil - } - // Process finalized add / updates, hence: (op.toAdd != "" && - // op.toDelete != "") || (op.toAdd != "" && op.toDelete == ""), which is - // equivalent the below. - if op.toAdd != "" { - delete(ops, egressIPString) - } - if len(ops) == 0 { - delete(oc.eIPC.pendingCloudPrivateIPConfigsOps, egressIPName) - } - } - return nil -} - -// removePendingOps removes the existing pending CloudPrivateIPConfig operations -// from the cache and returns the EgressIP object which can be re-synced given -// the new assignment possibilities. -func (oc *DefaultNetworkController) removePendingOpsAndGetResyncs(egressIPName, egressIP string) ([]egressipv1.EgressIP, error) { - oc.eIPC.pendingCloudPrivateIPConfigsMutex.Lock() - defer oc.eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() - ops, pending := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] - if !pending { - return nil, fmt.Errorf("no pending operation found for EgressIP: %s", egressIPName) - } - op, exists := ops[egressIP] - if !exists { - return nil, fmt.Errorf("pending operations found for EgressIP: %s, but not for the finalized IP: %s", egressIPName, egressIP) - } - // Make sure we are dealing with a delete operation, since for update - // operations will still need to process the add afterwards. - if op.toAdd == "" && op.toDelete != "" { - delete(ops, egressIP) - } - if len(ops) == 0 { - delete(oc.eIPC.pendingCloudPrivateIPConfigsOps, egressIPName) - } - - // Some EgressIP objects might not have all of their spec.egressIPs - // assigned because there was no room to assign them. Hence, every time - // we process a final deletion for a CloudPrivateIPConfig: have a look - // at what other EgressIP objects have something un-assigned, and force - // a reconciliation on them by sending a synthetic update. - egressIPs, err := oc.kube.GetEgressIPs() - if err != nil { - return nil, fmt.Errorf("unable to list EgressIPs, err: %v", err) - } - resyncs := make([]egressipv1.EgressIP, 0, len(egressIPs.Items)) - for _, egressIP := range egressIPs.Items { - // Do not process the egress IP object which owns the - // CloudPrivateIPConfig for which we are currently processing the - // deletion for. - if egressIP.Name == egressIPName { - continue - } - unassigned := len(egressIP.Spec.EgressIPs) - len(egressIP.Status.Items) - ops, pending := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIP.Name] - // If the EgressIP was never added to the pending cache to begin - // with, but has un-assigned egress IPs, try it. - if !pending && unassigned > 0 { - resyncs = append(resyncs, egressIP) - continue - } - // If the EgressIP has pending operations, have a look at if the - // unassigned operations superseed the pending ones. It could be - // that it could only execute a couple of assignments at one point. - if pending && unassigned > len(ops) { - resyncs = append(resyncs, egressIP) - } - } - return resyncs, nil -} - -type cloudPrivateIPConfigOp struct { - toAdd string - toDelete string -} - -// executeCloudPrivateIPConfigChange computes a diff between what needs to be -// assigned/removed and executes the object modification afterwards. -// Specifically: if one egress IP is moved from nodeA to nodeB, we actually care -// about an update on the CloudPrivateIPConfig object represented by that egress -// IP, cloudPrivateIPConfigOp is a helper used to determine that sort of -// operations from toAssign/toRemove -func (oc *DefaultNetworkController) executeCloudPrivateIPConfigChange(egressIPName string, toAssign, toRemove []egressipv1.EgressIPStatusItem) error { - oc.eIPC.pendingCloudPrivateIPConfigsMutex.Lock() - defer oc.eIPC.pendingCloudPrivateIPConfigsMutex.Unlock() - ops := make(map[string]*cloudPrivateIPConfigOp, len(toAssign)+len(toRemove)) - for _, assignment := range toAssign { - ops[assignment.EgressIP] = &cloudPrivateIPConfigOp{ - toAdd: assignment.Node, - } - } - for _, removal := range toRemove { - if op, exists := ops[removal.EgressIP]; exists { - op.toDelete = removal.Node - } else { - ops[removal.EgressIP] = &cloudPrivateIPConfigOp{ - toDelete: removal.Node, - } - } - } - // Merge ops into the existing pendingCloudPrivateIPConfigsOps. - // This allows us to: - // a) execute only the new ops - // b) keep track of any pending changes - if len(ops) > 0 { - if _, ok := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName]; !ok { - // Set all operations for the EgressIP object if none are in the cache currently. - oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName] = ops - } else { - for cloudPrivateIP, op := range ops { - if _, ok := oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP]; !ok { - // If this specific EgressIP object's CloudPrivateIPConfig address currently has no - // op, simply set it. - oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP] = op - } else { - // If an existing operation for this CloudPrivateIP exists, then the following logic should - // apply: - // If toDelete is currently set: keep the current toDelete. Theoretically, the oldest toDelete - // is the good one. If toDelete if currently not set, overwrite it with the new value. - // If toAdd is currently set: overwrite with the new toAdd. Theoretically, the newest toAdd is - // the good one. - // Therefore, only replace toAdd over a previously existing op and only replace toDelete if - // it's unset. - if op.toAdd != "" { - oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toAdd = op.toAdd - } - if oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toDelete == "" { - oc.eIPC.pendingCloudPrivateIPConfigsOps[egressIPName][cloudPrivateIP].toDelete = op.toDelete - } - } - } - } - } - return oc.executeCloudPrivateIPConfigOps(egressIPName, ops) -} - -func (oc *DefaultNetworkController) executeCloudPrivateIPConfigOps(egressIPName string, ops map[string]*cloudPrivateIPConfigOp) error { - for egressIP, op := range ops { - cloudPrivateIPConfigName := ipStringToCloudPrivateIPConfigName(egressIP) - cloudPrivateIPConfig, err := oc.watchFactory.GetCloudPrivateIPConfig(cloudPrivateIPConfigName) - // toAdd and toDelete is non-empty, this indicates an UPDATE for which - // the object **must** exist, if not: that's an error. - if op.toAdd != "" && op.toDelete != "" { - if err != nil { - return fmt.Errorf("cloud update request failed for CloudPrivateIPConfig: %s, could not get item, err: %v", cloudPrivateIPConfigName, err) - } - // Do not update if object is being deleted - if !cloudPrivateIPConfig.GetDeletionTimestamp().IsZero() { - return fmt.Errorf("cloud update request failed, CloudPrivateIPConfig: %s is being deleted", cloudPrivateIPConfigName) - } - cloudPrivateIPConfig.Spec.Node = op.toAdd - if _, err := oc.kube.UpdateCloudPrivateIPConfig(cloudPrivateIPConfig); err != nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: egressIPName, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "CloudUpdateFailed", "egress IP: %s for object EgressIP: %s could not be updated, err: %v", egressIP, egressIPName, err) - return fmt.Errorf("cloud update request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) - } - // toAdd is non-empty, this indicates an ADD - // if the object already exists for the specified node that's a no-op - // if the object already exists and the request is for a different node, that's an error - } else if op.toAdd != "" { - if err == nil { - if op.toAdd == cloudPrivateIPConfig.Spec.Node { - klog.Infof("CloudPrivateIPConfig: %s already assigned to node: %s", cloudPrivateIPConfigName, cloudPrivateIPConfig.Spec.Node) - continue - } - return fmt.Errorf("cloud create request failed for CloudPrivateIPConfig: %s, err: item exists", cloudPrivateIPConfigName) - } - cloudPrivateIPConfig := ocpcloudnetworkapi.CloudPrivateIPConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: cloudPrivateIPConfigName, - Annotations: map[string]string{ - util.OVNEgressIPOwnerRefLabel: egressIPName, - }, - }, - Spec: ocpcloudnetworkapi.CloudPrivateIPConfigSpec{ - Node: op.toAdd, - }, - } - if _, err := oc.kube.CreateCloudPrivateIPConfig(&cloudPrivateIPConfig); err != nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: egressIPName, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "CloudAssignmentFailed", "egress IP: %s for object EgressIP: %s could not be created, err: %v", egressIP, egressIPName, err) - return fmt.Errorf("cloud add request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) - } - // toDelete is non-empty, this indicates a DELETE - if the object does not exist, log an Info message and continue with the next op. - // The reason for why we are not throwing an error here is that desired state (deleted) == isState (object not found). - // If for whatever reason we have a pending toDelete op for a deleted object, then this op should simply be silently ignored. - // Any other error, return an error to trigger a retry. - } else if op.toDelete != "" { - if err != nil { - if apierrors.IsNotFound(err) { - klog.Infof("Cloud deletion request failed for CloudPrivateIPConfig: %s, item already deleted, err: %v", cloudPrivateIPConfigName, err) - continue - } else { - return fmt.Errorf("cloud deletion request failed for CloudPrivateIPConfig: %s, could not get item, err: %v", cloudPrivateIPConfigName, err) - } - } - if err := oc.kube.DeleteCloudPrivateIPConfig(cloudPrivateIPConfigName); err != nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: egressIPName, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "CloudDeletionFailed", "egress IP: %s for object EgressIP: %s could not be deleted, err: %v", egressIP, egressIPName, err) - return fmt.Errorf("cloud deletion request failed for CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err) - } - } - } - return nil -} - -func (oc *DefaultNetworkController) validateEgressIPSpec(name string, egressIPs []string) (sets.Set[string], error) { - validatedEgressIPs := sets.New[string]() - for _, egressIP := range egressIPs { - ip := net.ParseIP(egressIP) - if ip == nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "InvalidEgressIP", "egress IP: %s for object EgressIP: %s is not a valid IP address", egressIP, name) - return nil, fmt.Errorf("unable to parse provided EgressIP: %s, invalid", egressIP) - } - validatedEgressIPs.Insert(ip.String()) - } - return validatedEgressIPs, nil -} - -// validateEgressIPStatus validates if the statuses are valid given what the -// cache knows about all egress nodes. WatchEgressNodes is initialized before -// any other egress IP handler, so te cache should be warm and correct once we -// start going this. -func (oc *DefaultNetworkController) validateEgressIPStatus(name string, items []egressipv1.EgressIPStatusItem) (map[egressipv1.EgressIPStatusItem]string, map[egressipv1.EgressIPStatusItem]string) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - valid, invalid := make(map[egressipv1.EgressIPStatusItem]string), make(map[egressipv1.EgressIPStatusItem]string) - for _, eIPStatus := range items { - validAssignment := true - eNode, exists := oc.eIPC.allocator.cache[eIPStatus.Node] - if !exists { - klog.Errorf("Allocator error: EgressIP: %s claims to have an allocation on a node which is unassignable for egress IP: %s", name, eIPStatus.Node) - validAssignment = false - } else { - if eNode.getAllocationCountForEgressIP(name) > 1 { - klog.Errorf("Allocator error: EgressIP: %s claims multiple egress IPs on same node: %s, will attempt rebalancing", name, eIPStatus.Node) - validAssignment = false - } - if !eNode.isEgressAssignable { - klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which does not have egress label, will attempt rebalancing", name, eIPStatus.Node) - validAssignment = false - } - if !eNode.isReachable { - klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which is not reachable, will attempt rebalancing", name, eIPStatus.Node) - validAssignment = false - } - if !eNode.isReady { - klog.Errorf("Allocator error: EgressIP: %s assigned to node: %s which is not ready, will attempt rebalancing", name, eIPStatus.Node) - validAssignment = false - } - ip := net.ParseIP(eIPStatus.EgressIP) - if ip == nil { - klog.Errorf("Allocator error: EgressIP allocation contains unparsable IP address: %s", eIPStatus.EgressIP) - validAssignment = false - } - if node := oc.isAnyClusterNodeIP(ip); node != nil { - klog.Errorf("Allocator error: EgressIP allocation: %s is the IP of node: %s ", ip.String(), node.name) - validAssignment = false - } - if utilnet.IsIPv6(ip) && eNode.egressIPConfig.V6.Net != nil { - if !eNode.egressIPConfig.V6.Net.Contains(ip) { - klog.Errorf("Allocator error: EgressIP allocation: %s on subnet: %s which cannot host it", ip.String(), eNode.egressIPConfig.V4.Net.String()) - validAssignment = false - } - } else if !utilnet.IsIPv6(ip) && eNode.egressIPConfig.V4.Net != nil { - if !eNode.egressIPConfig.V4.Net.Contains(ip) { - klog.Errorf("Allocator error: EgressIP allocation: %s on subnet: %s which cannot host it", ip.String(), eNode.egressIPConfig.V4.Net.String()) - validAssignment = false - } - } else { - klog.Errorf("Allocator error: EgressIP allocation on node: %s which does not support its IP protocol version", eIPStatus.Node) - validAssignment = false - } - } - if validAssignment { - valid[eIPStatus] = "" - } else { - invalid[eIPStatus] = "" - } - } - return valid, invalid -} - -// addAllocatorEgressIPAssignments adds the allocation to the cache, so that -// they are tracked during the life-cycle of ovnkube-master -func (oc *DefaultNetworkController) addAllocatorEgressIPAssignments(name string, statusAssignments []egressipv1.EgressIPStatusItem) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - for _, status := range statusAssignments { - if eNode, exists := oc.eIPC.allocator.cache[status.Node]; exists { - eNode.allocations[status.EgressIP] = name - } - } -} +// main reconcile functions end here and local zone controller functions begin func (oc *DefaultNetworkController) addEgressIPAssignments(name string, statusAssignments []egressipv1.EgressIPStatusItem, namespaceSelector, podSelector metav1.LabelSelector) error { namespaces, err := oc.watchFactory.GetNamespacesBySelector(namespaceSelector) @@ -1102,26 +479,54 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu if len(statusAssignments) == 0 { return nil } - var remainingAssignments []egressipv1.EgressIPStatusItem - // Retrieve the pod's networking configuration from the - // logicalPortCache. The reason for doing this: a) only normal network - // pods are placed in this cache, b) once the pod is placed here we know - // addLogicalPort has finished successfully setting up networking for - // the pod, so we can proceed with retrieving its IP and deleting the - // external GW configuration created in addLogicalPort for the pod. - logicalPort, err := oc.logicalPortCache.get(pod, types.DefaultNetworkName) - if err != nil { - return nil + // We need to proceed with add only under two conditions + // 1) egressNode present in at least one status is local to this zone + // (NOTE: The relation between egressIPName and nodeName is 1:1 i.e in the same object the given node will be present only in one status) + // 2) the pod being added is local to this zone + proceed := false + for _, status := range statusAssignments { + oc.eIPC.nodeZoneState.LockKey(status.Node) + isLocalZoneEgressNode, loadedEgressNode := oc.eIPC.nodeZoneState.Load(status.Node) + if loadedEgressNode && isLocalZoneEgressNode { + proceed = true + oc.eIPC.nodeZoneState.UnlockKey(status.Node) + break + } + oc.eIPC.nodeZoneState.UnlockKey(status.Node) } - // Since the logical switch port cache removes entries only 60 seconds - // after deletion, its possible that when pod is recreated with the same name - // within the 60seconds timer, stale info gets used to create SNATs and reroutes - // for the eip pods. Checking if the expiry is set for the port or not can indicate - // if the port is scheduled for deletion. - if !logicalPort.expires.IsZero() { - klog.Warningf("Stale LSP %s for pod %s found in cache refetching", - logicalPort.name, podKey) - return nil + if !proceed && !oc.isPodScheduledinLocalZone(pod) { + return nil // nothing to do if none of the status nodes are local to this master and pod is also remote + } + var remainingAssignments []egressipv1.EgressIPStatusItem + var podIPs []*net.IPNet + var err error + if oc.isPodScheduledinLocalZone(pod) { + // Retrieve the pod's networking configuration from the + // logicalPortCache. The reason for doing this: a) only normal network + // pods are placed in this cache, b) once the pod is placed here we know + // addLogicalPort has finished successfully setting up networking for + // the pod, so we can proceed with retrieving its IP and deleting the + // external GW configuration created in addLogicalPort for the pod. + logicalPort, err := oc.logicalPortCache.get(pod, types.DefaultNetworkName) + if err != nil { + return nil + } + // Since the logical switch port cache removes entries only 60 seconds + // after deletion, its possible that when pod is recreated with the same name + // within the 60seconds timer, stale info gets used to create SNATs and reroutes + // for the eip pods. Checking if the expiry is set for the port or not can indicate + // if the port is scheduled for deletion. + if !logicalPort.expires.IsZero() { + klog.Warningf("Stale LSP %s for pod %s found in cache refetching", + logicalPort.name, podKey) + return nil + } + podIPs = logicalPort.ips + } else { // means this is egress node's local master + podIPs, err = util.GetPodCIDRsWithFullMask(pod, oc.NetInfo) + if err != nil { + return err + } } podState, exists := oc.eIPC.podAssignment[podKey] if !exists { @@ -1160,48 +565,41 @@ func (oc *DefaultNetworkController) addPodEgressIPAssignments(name string, statu return nil } for _, status := range remainingAssignments { - klog.V(2).Infof("Adding pod egress IP status: %v for EgressIP: %s and pod: %s/%s", status, name, pod.Name, pod.Namespace) - if err := oc.eIPC.addPodEgressIPAssignment(name, status, pod, logicalPort.ips); err != nil { + klog.V(2).Infof("Adding pod egress IP status: %v for EgressIP: %s and pod: %s/%s/%v", status, name, pod.Namespace, pod.Name, podIPs) + err = oc.eIPC.nodeZoneState.DoWithLock(status.Node, func(key string) error { + if status.Node == pod.Spec.NodeName { + // we are safe, no need to grab lock again + if err := oc.eIPC.addPodEgressIPAssignment(name, status, pod, podIPs); err != nil { + return fmt.Errorf("unable to create egressip configuration for pod %s/%s/%v, err: %w", pod.Namespace, pod.Name, podIPs, err) + } + podState.egressStatuses[status] = "" + return nil + } + return oc.eIPC.nodeZoneState.DoWithLock(pod.Spec.NodeName, func(key string) error { + // we need to grab lock again for pod's node + if err := oc.eIPC.addPodEgressIPAssignment(name, status, pod, podIPs); err != nil { + return fmt.Errorf("unable to create egressip configuration for pod %s/%s/%v, err: %w", pod.Namespace, pod.Name, podIPs, err) + } + podState.egressStatuses[status] = "" + return nil + }) + }) + if err != nil { return err } - podState.egressStatuses[status] = "" } - // add the podIP to the global egressIP address set - addrSetIPs := make([]net.IP, len(logicalPort.ips)) - for i, podIP := range logicalPort.ips { - copyPodIP := *podIP - addrSetIPs[i] = copyPodIP.IP - } - if err := oc.addPodIPsToAddressSet(addrSetIPs); err != nil { - return fmt.Errorf("cannot add egressPodIPs for the pod %s/%s to the address set: err: %v", pod.Namespace, pod.Name, err) - } - return nil -} - -// deleteAllocatorEgressIPAssignmentIfExists deletes egressIP config from node allocations map -// if the entry is available and returns assigned node name, otherwise returns empty string. -func (oc *DefaultNetworkController) deleteAllocatorEgressIPAssignmentIfExists(name, egressIP string) string { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - for nodeName, eNode := range oc.eIPC.allocator.cache { - if egressIPName, exists := eNode.allocations[egressIP]; exists && egressIPName == name { - delete(eNode.allocations, egressIP) - return nodeName + if oc.isPodScheduledinLocalZone(pod) { + // add the podIP to the global egressIP address set + addrSetIPs := make([]net.IP, len(podIPs)) + for i, podIP := range podIPs { + copyPodIP := *podIP + addrSetIPs[i] = copyPodIP.IP } - } - return "" -} - -// deleteAllocatorEgressIPAssignments deletes the allocation as to keep the -// cache state correct, also see addAllocatorEgressIPAssignments -func (oc *DefaultNetworkController) deleteAllocatorEgressIPAssignments(statusAssignments []egressipv1.EgressIPStatusItem) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - for _, status := range statusAssignments { - if eNode, exists := oc.eIPC.allocator.cache[status.Node]; exists { - delete(eNode.allocations, status.EgressIP) + if err := oc.addPodIPsToAddressSet(addrSetIPs); err != nil { + return fmt.Errorf("cannot add egressPodIPs for the pod %s/%s to the address set: err: %v", pod.Namespace, pod.Name, err) } } + return nil } // deleteEgressIPAssignments performs a full egress IP setup deletion on a per @@ -1215,10 +613,7 @@ func (oc *DefaultNetworkController) deleteEgressIPAssignments(name string, statu var podIPs []net.IP var err error for _, statusToRemove := range statusesToRemove { - klog.V(2).Infof("Deleting pod egress IP status: %v for EgressIP: %s", statusToRemove, name) - if podIPs, err = oc.eIPC.deleteEgressIPStatusSetup(name, statusToRemove); err != nil { - return err - } + removed := false for podKey, podStatus := range oc.eIPC.podAssignment { if podStatus.egressIPName != name { // we can continue here since this pod was not managed by this EIP object @@ -1229,18 +624,34 @@ func (oc *DefaultNetworkController) deleteEgressIPAssignments(name string, statu // we can continue here since this pod was not managed by this statusToRemove continue } - // this pod was managed by statusToRemove.EgressIP; we need to try and add its SNAT back towards nodeIP - podNamespace, podName := getPodNamespaceAndNameFromKey(podKey) - if err = oc.eIPC.addExternalGWPodSNAT(podNamespace, podName, statusToRemove); err != nil { + err = oc.eIPC.nodeZoneState.DoWithLock(statusToRemove.Node, func(key string) error { + // this statusToRemove was managing at least one pod, hence let's tear down the setup for this status + if !removed { + klog.V(2).Infof("Deleting pod egress IP status: %v for EgressIP: %s", statusToRemove, name) + if podIPs, err = oc.eIPC.deleteEgressIPStatusSetup(name, statusToRemove); err != nil { + return err + } + removed = true // we should only tear down once and not per pod since tear down is based on externalIDs + } + // this pod was managed by statusToRemove.EgressIP; we need to try and add its SNAT back towards nodeIP + podNamespace, podName := getPodNamespaceAndNameFromKey(podKey) + if err = oc.eIPC.addExternalGWPodSNAT(podNamespace, podName, statusToRemove); err != nil { + return err + } + delete(podStatus.egressStatuses, statusToRemove) + return nil + }) + if err != nil { return err } - delete(podStatus.egressStatuses, statusToRemove) if len(podStatus.egressStatuses) == 0 && len(podStatus.standbyEgressIPNames) == 0 { // pod could be managed by more than one egressIP // so remove the podKey from cache only if we are sure // there are no more egressStatuses managing this pod klog.V(5).Infof("Deleting pod key %s from assignment cache", podKey) // delete the podIP from the global egressIP address set since its no longer managed by egressIPs + // NOTE(tssurya): There is no way to infer if pod was local to this zone or not, + // so we try to nuke the IP from address-set anyways - it will be a no-op for remote pods if err := oc.deletePodIPsFromAddressSet(podIPs); err != nil { return fmt.Errorf("cannot delete egressPodIPs for the pod %s from the address set: err: %v", podKey, err) } @@ -1303,11 +714,31 @@ func (oc *DefaultNetworkController) deletePodEgressIPAssignments(name string, st return err } for _, statusToRemove := range statusesToRemove { + if _, ok := podStatus.egressStatuses[statusToRemove]; !ok { + // we can continue here since this pod was not managed by this statusToRemove + continue + } klog.V(2).Infof("Deleting pod egress IP status: %v for EgressIP: %s and pod: %s/%s", statusToRemove, name, pod.Name, pod.Namespace) - if err := oc.eIPC.deletePodEgressIPAssignment(name, statusToRemove, pod, podIPs); err != nil { + err = oc.eIPC.nodeZoneState.DoWithLock(statusToRemove.Node, func(key string) error { + if statusToRemove.Node == pod.Spec.NodeName { + // we are safe, no need to grab lock again + if err := oc.eIPC.deletePodEgressIPAssignment(name, statusToRemove, pod, podIPs); err != nil { + return err + } + delete(podStatus.egressStatuses, statusToRemove) + return nil + } + return oc.eIPC.nodeZoneState.DoWithLock(pod.Spec.NodeName, func(key string) error { + if err := oc.eIPC.deletePodEgressIPAssignment(name, statusToRemove, pod, podIPs); err != nil { + return err + } + delete(podStatus.egressStatuses, statusToRemove) + return nil + }) + }) + if err != nil { return err } - delete(podStatus.egressStatuses, statusToRemove) } // Delete the key if there are no more status assignments to keep // for the pod. @@ -1316,42 +747,37 @@ func (oc *DefaultNetworkController) deletePodEgressIPAssignments(name string, st // so remove the podKey from cache only if we are sure // there are no more egressStatuses managing this pod klog.V(5).Infof("Deleting pod key %s from assignment cache", podKey) - // delete the podIP from the global egressIP address set - addrSetIPs := make([]net.IP, len(podIPs)) - for i, podIP := range podIPs { - copyPodIP := *podIP - addrSetIPs[i] = copyPodIP.IP - } - if err := oc.deletePodIPsFromAddressSet(addrSetIPs); err != nil { - return fmt.Errorf("cannot delete egressPodIPs for the pod %s from the address set: err: %v", podKey, err) + if oc.isPodScheduledinLocalZone(pod) { + // delete the podIP from the global egressIP address set + addrSetIPs := make([]net.IP, len(podIPs)) + for i, podIP := range podIPs { + copyPodIP := *podIP + addrSetIPs[i] = copyPodIP.IP + } + if err := oc.deletePodIPsFromAddressSet(addrSetIPs); err != nil { + return fmt.Errorf("cannot delete egressPodIPs for the pod %s from the address set: err: %v", podKey, err) + } } delete(oc.eIPC.podAssignment, podKey) } return nil } -func (oc *DefaultNetworkController) isEgressNodeReady(egressNode *kapi.Node) bool { - for _, condition := range egressNode.Status.Conditions { - if condition.Type == v1.NodeReady { - return condition.Status == v1.ConditionTrue - } - } - return false -} - -func (oc *DefaultNetworkController) isEgressNodeReachable(egressNode *kapi.Node) bool { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if eNode, exists := oc.eIPC.allocator.cache[egressNode.Name]; exists { - return eNode.isReachable || oc.isReachable(eNode.name, eNode.mgmtIPs, eNode.healthClient) - } - return false -} - type egressIPCacheEntry struct { - egressPods map[string]sets.Set[string] + // egressLocalPods will contain all the pods that + // are local to this zone being served by thie egressIP + // object. This will help sync LRP & LRSR. + egressLocalPods map[string]sets.Set[string] + // egressRemotePods will contain all the remote pods + // that are being served by this egressIP object + // This will help sync SNATs. + egressRemotePods map[string]sets.Set[string] // will be used only when multizone IC is enabled gatewayRouterIPs sets.Set[string] egressIPs map[string]string + // egressLocalNodes will contain all nodes that are local + // to this zone which are serving this egressIP object.. + // This will help sync SNATs + egressLocalNodes sets.Set[string] } func (oc *DefaultNetworkController) syncEgressIPs(namespaces []interface{}) error { @@ -1367,6 +793,13 @@ func (oc *DefaultNetworkController) syncEgressIPs(namespaces []interface{}) erro // This function is called when handlers for EgressIPNamespaceType are started // since namespaces is the first object that egressIP feature starts watching + // update localZones cache of eIPCZoneController + // WatchNodes() is called before WatchEgressIPNamespaces() so the oc.localZones cache + // will be updated whereas WatchEgressNodes() is called after WatchEgressIPNamespaces() + // and so we must update the cache to ensure we are not stale. + if err := oc.syncLocalNodeZonesCache(); err != nil { + return fmt.Errorf("syncLocalNodeZonesCache unable to update the local zones node cache: %v", err) + } egressIPCache, err := oc.generateCacheForEgressIP() if err != nil { return fmt.Errorf("syncEgressIPs unable to generate cache for egressip: %v", err) @@ -1386,6 +819,23 @@ func (oc *DefaultNetworkController) syncEgressIPs(namespaces []interface{}) erro return nil } +func (oc *DefaultNetworkController) syncLocalNodeZonesCache() error { + nodes, err := oc.watchFactory.GetNodes() + if err != nil { + return fmt.Errorf("unable to fetch nodes from watch factory %w", err) + } + for _, node := range nodes { + // NOTE: Even at this stage, there can be race; the bnc.zone might be the nodeName + // while the node's annotations are not yet set, so it still shows global. + // The EgressNodeType events (which are basically all node updates) should + // constantly update this cache as nodes get added, updated and removed + oc.eIPC.nodeZoneState.LockKey(node.Name) + oc.eIPC.nodeZoneState.Store(node.Name, oc.isLocalZoneNode(node)) + oc.eIPC.nodeZoneState.UnlockKey(node.Name) + } + return nil +} + func (oc *DefaultNetworkController) syncStaleAddressSetIPs(egressIPCache map[string]egressIPCacheEntry) error { dbIDs := getEgressIPAddrSetDbIDs(EgressIPServedPodsAddrSetName, oc.controllerName) as, err := oc.addressSetFactory.EnsureAddressSet(dbIDs) @@ -1393,8 +843,10 @@ func (oc *DefaultNetworkController) syncStaleAddressSetIPs(egressIPCache map[str return fmt.Errorf("cannot ensure that addressSet for egressIP pods %s exists %v", EgressIPServedPodsAddrSetName, err) } var allEIPServedPodIPs []net.IP + // we only care about local zone pods for the address-set since + // traffic from remote pods towards nodeIP won't even reach this zone for eipName := range egressIPCache { - for _, podIPs := range egressIPCache[eipName].egressPods { + for _, podIPs := range egressIPCache[eipName].egressLocalPods { for podIP := range podIPs { allEIPServedPodIPs = append(allEIPServedPodIPs, net.ParseIP(podIP)) } @@ -1417,14 +869,31 @@ func (oc *DefaultNetworkController) syncPodAssignmentCache(egressIPCache map[str oc.eIPC.podAssignmentMutex.Lock() defer oc.eIPC.podAssignmentMutex.Unlock() for egressIPName, state := range egressIPCache { - p := func(item *nbdb.LogicalRouterPolicy) bool { + p1 := func(item *nbdb.LogicalRouterPolicy) bool { return item.Priority == types.EgressIPReroutePriority && item.ExternalIDs["name"] == egressIPName } - reRoutePolicies, err := libovsdbops.FindLogicalRouterPoliciesWithPredicate(oc.nbClient, p) + reRoutePolicies, err := libovsdbops.FindLogicalRouterPoliciesWithPredicate(oc.nbClient, p1) if err != nil { return err } - for podKey, podIPs := range state.egressPods { + p2 := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.ExternalIDs["name"] == egressIPName + } + reRouteStaticRoutes, err := libovsdbops.FindLogicalRouterStaticRoutesWithPredicate(oc.nbClient, p2) + if err != nil { + return err + } + // Because of how we do generateCacheForEgressIP, we will only have pods that are + // either local to zone (in which case reRoutePolicies will work) OR pods that are + // managed by local egressIP nodes (in which case reRouteStaticRoutes will work) + egressPods := make(map[string]sets.Set[string]) + for podKey, podIPs := range state.egressLocalPods { + egressPods[podKey] = podIPs + } + for podKey, podIPs := range state.egressRemotePods { + egressPods[podKey] = podIPs + } + for podKey, podIPs := range egressPods { podState, ok := oc.eIPC.podAssignment[podKey] if !ok { podState = &podAssignmentState{ @@ -1451,6 +920,14 @@ func (oc *DefaultNetworkController) syncPodAssignmentCache(egressIPCache map[str klog.Infof("EgressIP %s is managing pod %s", egressIPName, podKey) } } + for _, lrsr := range reRouteStaticRoutes { + logicalIP := lrsr.IPPrefix + if podIPs.Has(logicalIP) { // should match for only one egressIP object + podState.egressIPName = egressIPName + podState.standbyEgressIPNames.Delete(egressIPName) + klog.Infof("EgressIP %s is managing pod %s", egressIPName, podKey) + } + } oc.eIPC.podAssignment[podKey] = podState } } @@ -1475,7 +952,10 @@ func (oc *DefaultNetworkController) syncStaleEgressReroutePolicy(egressIPCache m parsedLogicalIP := net.ParseIP(logicalIP) egressPodIPs := sets.NewString() if exists { - for _, podIPs := range cacheEntry.egressPods { + // Since LRPs are created only for pods local to this zone + // we need to care about only those pods. Nexthop for them will + // either be transit switch IP or join switch IP. + for _, podIPs := range cacheEntry.egressLocalPods { egressPodIPs.Insert(podIPs.UnsortedList()...) } } @@ -1531,7 +1011,12 @@ func (oc *DefaultNetworkController) syncStaleSNATRules(egressIPCache map[string] cacheEntry, exists := egressIPCache[egressIPName] egressPodIPs := sets.NewString() if exists { - for _, podIPs := range cacheEntry.egressPods { + // since SNATs can be present either if status.Node was local to + // the zone or pods were local to the zone, we need to check both + for _, podIPs := range cacheEntry.egressLocalPods { + egressPodIPs.Insert(podIPs.UnsortedList()...) + } + for _, podIPs := range cacheEntry.egressRemotePods { egressPodIPs.Insert(podIPs.UnsortedList()...) } } @@ -1539,7 +1024,7 @@ func (oc *DefaultNetworkController) syncStaleSNATRules(egressIPCache map[string] klog.Infof("syncStaleSNATRules will delete %s due to logical ip: %v", egressIPName, item) return true } - if node, ok := cacheEntry.egressIPs[item.ExternalIP]; !ok || + if node, ok := cacheEntry.egressIPs[item.ExternalIP]; !ok || !cacheEntry.egressLocalNodes.Has(node) || item.LogicalPort == nil || *item.LogicalPort != types.K8sPrefix+node { klog.Infof("syncStaleSNATRules will delete %s due to external ip or stale logical port: %v", egressIPName, item) return true @@ -1614,18 +1099,32 @@ func (oc *DefaultNetworkController) generateCacheForEgressIP() (map[string]egres } for _, egressIP := range egressIPs { egressIPCache[egressIP.Name] = egressIPCacheEntry{ - egressPods: make(map[string]sets.Set[string]), - gatewayRouterIPs: sets.New[string](), + egressLocalPods: make(map[string]sets.Set[string]), + egressRemotePods: make(map[string]sets.Set[string]), + gatewayRouterIPs: sets.New[string](), // can be transit switchIPs for interconnect multizone setup egressIPs: map[string]string{}, + egressLocalNodes: sets.New[string](), } for _, status := range egressIP.Status.Items { + var nextHopIP string isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := oc.eIPC.getGatewayRouterJoinIP(status.Node, isEgressIPv6) - if err != nil { - klog.Errorf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) - continue + _, isLocalZoneEgressNode := oc.localZoneNodes.Load(status.Node) + if isLocalZoneEgressNode { + gatewayRouterIP, err := oc.eIPC.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + klog.Errorf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + continue + } + nextHopIP = gatewayRouterIP.String() + egressIPCache[egressIP.Name].egressLocalNodes.Insert(status.Node) + } else { + nextHopIP, err = oc.eIPC.getTransitIP(status.Node, isEgressIPv6) + if err != nil { + klog.Errorf("Unable to fetch transit switch IP for node %s: %v", status.Node, err) + continue + } } - egressIPCache[egressIP.Name].gatewayRouterIPs.Insert(gatewayRouterIP.String()) + egressIPCache[egressIP.Name].gatewayRouterIPs.Insert(nextHopIP) egressIPCache[egressIP.Name].egressIPs[status.EgressIP] = status.Node } namespaces, err := oc.watchFactory.GetNamespacesBySelector(egressIP.Spec.NamespaceSelector) @@ -1643,6 +1142,9 @@ func (oc *DefaultNetworkController) generateCacheForEgressIP() (map[string]egres if util.PodCompleted(pod) { continue } + if len(egressIPCache[egressIP.Name].egressLocalNodes) == 0 && !oc.isPodScheduledinLocalZone(pod) { + continue // don't process anything on master's that have nothing to do with the pod + } // FIXME(trozet): potential race where pod is not yet added in the cache by the pod handler logicalPort, err := oc.logicalPortCache.get(pod, types.DefaultNetworkName) if err != nil { @@ -1650,496 +1152,117 @@ func (oc *DefaultNetworkController) generateCacheForEgressIP() (map[string]egres continue } podKey := getPodKey(pod) - _, ok := egressIPCache[egressIP.Name].egressPods[podKey] - if !ok { - egressIPCache[egressIP.Name].egressPods[podKey] = sets.New[string]() - } - for _, ipNet := range logicalPort.ips { - egressIPCache[egressIP.Name].egressPods[podKey].Insert(ipNet.IP.String()) - } - } - } - } - - return egressIPCache, nil -} - -// isAnyClusterNodeIP verifies that the IP is not any node IP. -func (oc *DefaultNetworkController) isAnyClusterNodeIP(ip net.IP) *egressNode { - for _, eNode := range oc.eIPC.allocator.cache { - if ip.Equal(eNode.egressIPConfig.V6.IP) || ip.Equal(eNode.egressIPConfig.V4.IP) { - return eNode - } - } - return nil -} - -type EgressIPPatchStatus struct { - Op string `json:"op"` - Path string `json:"path"` - Value egressipv1.EgressIPStatus `json:"value"` -} - -// patchReplaceEgressIPStatus performs a replace patch operation of the egress -// IP status by replacing the status with the provided value. This allows us to -// update only the status field, without overwriting any other. This is -// important because processing egress IPs can take a while (when running on a -// public cloud and in the worst case), hence we don't want to perform a full -// object update which risks resetting the EgressIP object's fields to the state -// they had when we started processing the change. -func (oc *DefaultNetworkController) patchReplaceEgressIPStatus(name string, statusItems []egressipv1.EgressIPStatusItem) error { - klog.Infof("Patching status on EgressIP %s: %v", name, statusItems) - return retry.RetryOnConflict(retry.DefaultRetry, func() error { - t := []EgressIPPatchStatus{ - { - Op: "replace", - Path: "/status", - Value: egressipv1.EgressIPStatus{ - Items: statusItems, - }, - }, - } - op, err := json.Marshal(&t) - if err != nil { - return fmt.Errorf("error serializing status patch operation: %+v, err: %v", statusItems, err) - } - return oc.kube.PatchEgressIP(name, op) - }) -} - -// assignEgressIPs is the main assignment algorithm for egress IPs to nodes. -// Specifically we have a couple of hard constraints: a) the subnet of the node -// must be able to host the egress IP b) the egress IP cannot be a node IP c) -// the IP cannot already be assigned and reference by another EgressIP object d) -// no two egress IPs for the same EgressIP object can be assigned to the same -// node e) (for public clouds) the amount of egress IPs assigned to one node -// must respect its assignment capacity. Moreover there is a soft constraint: -// the assignments need to be balanced across all cluster nodes, so that no node -// becomes a bottleneck. The balancing is achieved by sorting the nodes in -// ascending order following their existing amount of allocations, and trying to -// assign the egress IP to the node with the lowest amount of allocations every -// time, this does not guarantee complete balance, but mostly complete. -func (oc *DefaultNetworkController) assignEgressIPs(name string, egressIPs []string) []egressipv1.EgressIPStatusItem { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - assignments := []egressipv1.EgressIPStatusItem{} - assignableNodes, existingAllocations := oc.getSortedEgressData() - if len(assignableNodes) == 0 { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "NoMatchingNodeFound", "no assignable nodes for EgressIP: %s, please tag at least one node with label: %s", name, util.GetNodeEgressLabel()) - klog.Errorf("No assignable nodes found for EgressIP: %s and requested IPs: %v", name, egressIPs) - return assignments - } - klog.V(5).Infof("Current assignments are: %+v", existingAllocations) - for _, egressIP := range egressIPs { - klog.V(5).Infof("Will attempt assignment for egress IP: %s", egressIP) - eIPC := net.ParseIP(egressIP) - if status, exists := existingAllocations[eIPC.String()]; exists { - // On public clouds we will re-process assignments for the same IP - // multiple times due to the nature of syncing each individual - // CloudPrivateIPConfig one at a time. This means that we are - // expected to end up in this situation multiple times per sync. Ex: - // Say we an EgressIP is created with IP1, IP2, IP3. We begin by - // assigning them all the first round. Next we get the - // CloudPrivateIPConfig confirming the addition of IP1, leading us - // to re-assign IP2, IP3, but since we've already assigned them - // we'll end up here. This is not an error. What would be an error - // is if the user created EIP1 with IP1 and a second EIP2 with IP1 - if name == status.Name { - // IP is already assigned for this EgressIP object - assignments = append(assignments, egressipv1.EgressIPStatusItem{ - Node: status.Node, - EgressIP: eIPC.String(), - }) - continue - } else { - klog.Errorf("IP: %q for EgressIP: %s is already allocated for EgressIP: %s on %s", egressIP, name, status.Name, status.Node) - return assignments - } - } - if node := oc.isAnyClusterNodeIP(eIPC); node != nil { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf( - &eIPRef, - kapi.EventTypeWarning, - "UnsupportedRequest", - "Egress IP: %v for object EgressIP: %s is the IP address of node: %s, this is unsupported", eIPC, name, node.name, - ) - klog.Errorf("Egress IP: %v is the IP address of node: %s", eIPC, node.name) - return assignments - } - for _, eNode := range assignableNodes { - klog.V(5).Infof("Attempting assignment on egress node: %+v", eNode) - if eNode.getAllocationCountForEgressIP(name) > 0 { - klog.V(5).Infof("Node: %s is already in use by another egress IP for this EgressIP: %s, trying another node", eNode.name, name) - continue - } - if eNode.egressIPConfig.Capacity.IP < util.UnlimitedNodeCapacity { - if eNode.egressIPConfig.Capacity.IP-len(eNode.allocations) <= 0 { - klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IP capacity, trying another node", eNode.name) - continue - } - } - if eNode.egressIPConfig.Capacity.IPv4 < util.UnlimitedNodeCapacity && utilnet.IsIPv4(eIPC) { - if eNode.egressIPConfig.Capacity.IPv4-getIPFamilyAllocationCount(eNode.allocations, false) <= 0 { - klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv4 capacity, trying another node", eNode.name) - continue - } - } - if eNode.egressIPConfig.Capacity.IPv6 < util.UnlimitedNodeCapacity && utilnet.IsIPv6(eIPC) { - if eNode.egressIPConfig.Capacity.IPv6-getIPFamilyAllocationCount(eNode.allocations, true) <= 0 { - klog.V(5).Infof("Additional allocation on Node: %s exhausts it's IPv6 capacity, trying another node", eNode.name) - continue + if oc.isPodScheduledinLocalZone(pod) { + _, ok := egressIPCache[egressIP.Name].egressLocalPods[podKey] + if !ok { + egressIPCache[egressIP.Name].egressLocalPods[podKey] = sets.New[string]() + } + for _, ipNet := range logicalPort.ips { + egressIPCache[egressIP.Name].egressLocalPods[podKey].Insert(ipNet.IP.String()) + } + } else if len(egressIPCache[egressIP.Name].egressLocalNodes) > 0 { + // it means this controller has at least one egressNode that is in localZone but matched pod is remote + _, ok := egressIPCache[egressIP.Name].egressRemotePods[podKey] + if !ok { + egressIPCache[egressIP.Name].egressRemotePods[podKey] = sets.New[string]() + } + for _, ipNet := range logicalPort.ips { + egressIPCache[egressIP.Name].egressRemotePods[podKey].Insert(ipNet.IP.String()) + } } } - if (eNode.egressIPConfig.V6.Net != nil && eNode.egressIPConfig.V6.Net.Contains(eIPC)) || - (eNode.egressIPConfig.V4.Net != nil && eNode.egressIPConfig.V4.Net.Contains(eIPC)) { - assignments = append(assignments, egressipv1.EgressIPStatusItem{ - Node: eNode.name, - EgressIP: eIPC.String(), - }) - klog.Infof("Successful assignment of egress IP: %s on node: %+v", egressIP, eNode) - eNode.allocations[eIPC.String()] = name - break - } - } - } - if len(assignments) == 0 { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "NoMatchingNodeFound", "No matching nodes found, which can host any of the egress IPs: %v for object EgressIP: %s", egressIPs, name) - klog.Errorf("No matching host found for EgressIP: %s", name) - return assignments - } - if len(assignments) < len(egressIPs) { - eIPRef := kapi.ObjectReference{ - Kind: "EgressIP", - Name: name, - } - oc.recorder.Eventf(&eIPRef, kapi.EventTypeWarning, "UnassignedRequest", "Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", name) - } - return assignments -} - -func getIPFamilyAllocationCount(allocations map[string]string, isIPv6 bool) (count int) { - for allocation := range allocations { - if utilnet.IsIPv4String(allocation) && !isIPv6 { - count++ - } - if utilnet.IsIPv6String(allocation) && isIPv6 { - count++ - } - } - return -} - -type egressIPNodeStatus struct { - Node string - Name string -} - -// getSortedEgressData returns a sorted slice of all egressNodes based on the -// amount of allocations found in the cache -func (oc *DefaultNetworkController) getSortedEgressData() ([]*egressNode, map[string]egressIPNodeStatus) { - assignableNodes := []*egressNode{} - allAllocations := make(map[string]egressIPNodeStatus) - for _, eNode := range oc.eIPC.allocator.cache { - if eNode.isEgressAssignable && eNode.isReady && eNode.isReachable { - assignableNodes = append(assignableNodes, eNode) - } - for ip, eipName := range eNode.allocations { - allAllocations[ip] = egressIPNodeStatus{Node: eNode.name, Name: eipName} - } - } - sort.Slice(assignableNodes, func(i, j int) bool { - return len(assignableNodes[i].allocations) < len(assignableNodes[j].allocations) - }) - return assignableNodes, allAllocations -} - -func (oc *DefaultNetworkController) setNodeEgressAssignable(nodeName string, isAssignable bool) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if eNode, exists := oc.eIPC.allocator.cache[nodeName]; exists { - eNode.isEgressAssignable = isAssignable - // if the node is not assignable/ready/reachable anymore we need to - // empty all of it's allocations from our cache since we'll clear all - // assignments from this node later on, because of this. - if !isAssignable { - eNode.allocations = make(map[string]string) - } - } -} - -func (oc *DefaultNetworkController) setNodeEgressReady(nodeName string, isReady bool) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if eNode, exists := oc.eIPC.allocator.cache[nodeName]; exists { - eNode.isReady = isReady - // see setNodeEgressAssignable - if !isReady { - eNode.allocations = make(map[string]string) - } - } -} - -func (oc *DefaultNetworkController) setNodeEgressReachable(nodeName string, isReachable bool) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if eNode, exists := oc.eIPC.allocator.cache[nodeName]; exists { - eNode.isReachable = isReachable - // see setNodeEgressAssignable - if !isReachable { - eNode.allocations = make(map[string]string) - } - } -} - -func (oc *DefaultNetworkController) addEgressNode(nodeName string) error { - var errors []error - klog.V(5).Infof("Egress node: %s about to be initialized", nodeName) - // This option will program OVN to start sending GARPs for all external IPS - // that the logical switch port has been configured to use. This is - // necessary for egress IP because if an egress IP is moved between two - // nodes, the nodes need to actively update the ARP cache of all neighbors - // as to notify them the change. If this is not the case: packets will - // continue to be routed to the old node which hosted the egress IP before - // it was moved, and the connections will fail. - portName := types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName - lsp := nbdb.LogicalSwitchPort{ - Name: portName, - // Setting nat-addresses to router will send out GARPs for all externalIPs and LB VIPs - // hosted on the GR. Setting exclude-lb-vips-from-garp to true will make sure GARPs for - // LB VIPs are not sent, thereby preventing GARP overload. - Options: map[string]string{"nat-addresses": "router", "exclude-lb-vips-from-garp": "true"}, - } - err := libovsdbops.UpdateLogicalSwitchPortSetOptions(oc.nbClient, &lsp) - if err != nil { - errors = append(errors, fmt.Errorf("unable to configure GARP on external logical switch port for egress node: %s, "+ - "this will result in packet drops during egress IP re-assignment, err: %v", nodeName, err)) - } - - // If a node has been labelled for egress IP we need to check if there are any - // egress IPs which are missing an assignment. If there are, we need to send a - // synthetic update since reconcileEgressIP will then try to assign those IPs to - // this node (if possible) - egressIPs, err := oc.kube.GetEgressIPs() - if err != nil { - return fmt.Errorf("unable to list EgressIPs, err: %v", err) - } - for _, egressIP := range egressIPs.Items { - if len(egressIP.Spec.EgressIPs) != len(egressIP.Status.Items) { - // Send a "synthetic update" on all egress IPs which are not fully - // assigned, the reconciliation loop for WatchEgressIP will try to - // assign stuff to this new node. The workqueue's delta FIFO - // implementation will not trigger a watch event for updates on - // objects which have no semantic difference, hence: call the - // reconciliation function directly. - if err := oc.reconcileEgressIP(nil, &egressIP); err != nil { - errors = append(errors, fmt.Errorf("synthetic update for EgressIP: %s failed, err: %v", egressIP.Name, err)) - } } } - if len(errors) > 0 { - return utilerrors.NewAggregate(errors) - } - return nil + return egressIPCache, nil } -func (oc *DefaultNetworkController) deleteEgressNode(nodeName string) error { - var errorAggregate []error - klog.V(5).Infof("Egress node: %s about to be removed", nodeName) - // This will remove the option described in addEgressNode from the logical - // switch port, since this node will not be used for egress IP assignments - // from now on. - portName := types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName - lsp := nbdb.LogicalSwitchPort{ - Name: portName, - Options: map[string]string{"nat-addresses": "", "exclude-lb-vips-from-garp": ""}, - } - err := libovsdbops.UpdateLogicalSwitchPortSetOptions(oc.nbClient, &lsp) - if errors.Is(err, libovsdbclient.ErrNotFound) { - // if the LSP setup is already gone, then don't count it as error. - klog.Warningf("Unable to remove GARP configuration on external logical switch port for egress node: %s, err: %v", nodeName, err) - } else if err != nil { - errorAggregate = append(errorAggregate, fmt.Errorf("unable to remove GARP configuration on external logical switch port for egress node: %s, err: %v", nodeName, err)) - } - - // Since the node has been labelled as "not usable" for egress IP - // assignments we need to find all egress IPs which have an assignment to - // it, and move them elsewhere. - egressIPs, err := oc.kube.GetEgressIPs() - if err != nil { - return fmt.Errorf("unable to list EgressIPs, err: %v", err) - } - for _, egressIP := range egressIPs.Items { - for _, status := range egressIP.Status.Items { - if status.Node == nodeName { - // Send a "synthetic update" on all egress IPs which have an - // assignment to this node. The reconciliation loop for - // WatchEgressIP will see that the current assignment status to - // this node is invalid and try to re-assign elsewhere. The - // workqueue's delta FIFO implementation will not trigger a - // watch event for updates on objects which have no semantic - // difference, hence: call the reconciliation function directly. - if err := oc.reconcileEgressIP(nil, &egressIP); err != nil { - errorAggregate = append(errorAggregate, fmt.Errorf("Re-assignment for EgressIP: %s failed, unable to update object, err: %v", egressIP.Name, err)) - } - break - } - } - } - if len(errorAggregate) > 0 { - return utilerrors.NewAggregate(errorAggregate) - } - return nil +type EgressIPPatchStatus struct { + Op string `json:"op"` + Path string `json:"path"` + Value egressipv1.EgressIPStatus `json:"value"` } -func (oc *DefaultNetworkController) initEgressIPAllocator(node *kapi.Node) (err error) { - oc.eIPC.allocator.Lock() - defer oc.eIPC.allocator.Unlock() - if _, exists := oc.eIPC.allocator.cache[node.Name]; !exists { - var parsedEgressIPConfig *util.ParsedNodeEgressIPConfiguration - if util.PlatformTypeIsEgressIPCloudProvider() { - parsedEgressIPConfig, err = util.ParseCloudEgressIPConfig(node) - if err != nil { - return fmt.Errorf("unable to use cloud node for egress assignment, err: %v", err) - } - } else { - parsedEgressIPConfig, err = util.ParseNodePrimaryIfAddr(node) - if err != nil { - return fmt.Errorf("unable to use node for egress assignment, err: %v", err) - } +// patchReplaceEgressIPStatus performs a replace patch operation of the egress +// IP status by replacing the status with the provided value. This allows us to +// update only the status field, without overwriting any other. This is +// important because processing egress IPs can take a while (when running on a +// public cloud and in the worst case), hence we don't want to perform a full +// object update which risks resetting the EgressIP object's fields to the state +// they had when we started processing the change. +// used for UNIT TESTING only +func (oc *DefaultNetworkController) patchReplaceEgressIPStatus(name string, statusItems []egressipv1.EgressIPStatusItem) error { + klog.Infof("Patching status on EgressIP %s: %v", name, statusItems) + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + t := []EgressIPPatchStatus{ + { + Op: "replace", + Path: "/status", + Value: egressipv1.EgressIPStatus{ + Items: statusItems, + }, + }, } - nodeSubnets, err := util.ParseNodeHostSubnetAnnotation(node, types.DefaultNetworkName) + op, err := json.Marshal(&t) if err != nil { - return fmt.Errorf("failed to parse node %s subnets annotation %v", node.Name, err) - } - mgmtIPs := make([]net.IP, len(nodeSubnets)) - for i, subnet := range nodeSubnets { - mgmtIPs[i] = util.GetNodeManagementIfAddr(subnet).IP - } - oc.eIPC.allocator.cache[node.Name] = &egressNode{ - name: node.Name, - egressIPConfig: parsedEgressIPConfig, - mgmtIPs: mgmtIPs, - allocations: make(map[string]string), - healthClient: hccAllocator.allocate(node.Name), + return fmt.Errorf("error serializing status patch operation: %+v, err: %v", statusItems, err) } - } - return nil + return oc.kube.PatchEgressIP(name, op) + }) } -// reconcileNodeForEgressIP with respect and old and new status of a node -func (oc *DefaultNetworkController) reconcileNodeForEgressIP(oldNode, newNode *v1.Node) error { - // Check if the node's addresses changed. If so, update LR policies. - if oldNode == nil || newNode == nil || util.NodeHostAddressesAnnotationChanged(oldNode, newNode) { - klog.Infof("Egress IP detected IP address change. Updating no re-route policies") - err := oc.ensureDefaultNoRerouteNodePolicies() - if err != nil { - return err - } - } - - nodeEgressLabel := util.GetNodeEgressLabel() - var oldLabels map[string]string - var newLabels map[string]string - var isOldReady, isNewReady, isNewReachable bool - var nodeName string - if oldNode != nil { - oldLabels = oldNode.GetLabels() - isOldReady = oc.isEgressNodeReady(oldNode) - nodeName = oldNode.Name - } - if newNode != nil { - // Initialize the allocator on every update, - // ovnkube-node/cloud-network-config-controller will make sure to - // annotate the node with the egressIPConfig, but that might have - // happened after we processed the ADD for that object, hence keep - // retrying for all UPDATEs. - if err := oc.initEgressIPAllocator(newNode); err != nil { - klog.Warningf("Egress node initialization error: %v", err) - } - - newLabels = newNode.GetLabels() - isNewReady = oc.isEgressNodeReady(newNode) - isNewReachable = oc.isEgressNodeReachable(newNode) - nodeName = newNode.Name - } else if oldNode != nil { - err := oc.deleteEgressIPAllocator(oldNode) - if err != nil { - return nil - } - } - - _, oldHadEgressLabel := oldLabels[nodeEgressLabel] - _, newHasEgressLabel := newLabels[nodeEgressLabel] - oc.setNodeEgressAssignable(nodeName, newHasEgressLabel) - oc.setNodeEgressReady(nodeName, isNewReady) - - // If the node is not labeled for egress assignment, just return - // directly, we don't really need to set the ready / reachable - // status on this node if the user doesn't care about using it. - if !oldHadEgressLabel && !newHasEgressLabel { +func (oc *DefaultNetworkController) addEgressNode(node *v1.Node) error { + if node == nil { return nil } - - if oldHadEgressLabel && !newHasEgressLabel { - klog.Infof("Node: %s has been un-labeled, deleting it from egress assignment", nodeName) - return oc.deleteEgressNode(nodeName) - } - - if !oldHadEgressLabel && newHasEgressLabel { - klog.Infof("Node: %s has been labeled, adding it for egress assignment", nodeName) - if isNewReady && isNewReachable { - oc.setNodeEgressReachable(nodeName, isNewReachable) - if err := oc.addEgressNode(nodeName); err != nil { - return err - } - } else { - klog.Warningf("Node: %s has been labeled, but node is not ready"+ - " and reachable, cannot use it for egress assignment", nodeName) + if oc.isLocalZoneNode(node) { + klog.V(5).Infof("Egress node: %s about to be initialized", node.Name) + // This option will program OVN to start sending GARPs for all external IPS + // that the logical switch port has been configured to use. This is + // necessary for egress IP because if an egress IP is moved between two + // nodes, the nodes need to actively update the ARP cache of all neighbors + // as to notify them the change. If this is not the case: packets will + // continue to be routed to the old node which hosted the egress IP before + // it was moved, and the connections will fail. + portName := types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + lsp := nbdb.LogicalSwitchPort{ + Name: portName, + // Setting nat-addresses to router will send out GARPs for all externalIPs and LB VIPs + // hosted on the GR. Setting exclude-lb-vips-from-garp to true will make sure GARPs for + // LB VIPs are not sent, thereby preventing GARP overload. + Options: map[string]string{"nat-addresses": "router", "exclude-lb-vips-from-garp": "true"}, + } + err := libovsdbops.UpdateLogicalSwitchPortSetOptions(oc.nbClient, &lsp) + if err != nil { + return fmt.Errorf("unable to configure GARP on external logical switch port for egress node: %s, "+ + "this will result in packet drops during egress IP re-assignment, err: %v", node.Name, err) } - return nil } + return nil +} - if isOldReady == isNewReady { +func (oc *DefaultNetworkController) deleteEgressNode(node *v1.Node) error { + if node == nil { return nil } - - if !isNewReady { - klog.Warningf("Node: %s is not ready, deleting it from egress assignment", nodeName) - if err := oc.deleteEgressNode(nodeName); err != nil { - return err + if oc.isLocalZoneNode(node) { + klog.V(5).Infof("Egress node: %s about to be removed", node.Name) + // This will remove the option described in addEgressNode from the logical + // switch port, since this node will not be used for egress IP assignments + // from now on. + portName := types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + lsp := nbdb.LogicalSwitchPort{ + Name: portName, + Options: map[string]string{"nat-addresses": "", "exclude-lb-vips-from-garp": ""}, } - } else if isNewReady && isNewReachable { - klog.Infof("Node: %s is ready and reachable, adding it for egress assignment", nodeName) - oc.setNodeEgressReachable(nodeName, isNewReachable) - if err := oc.addEgressNode(nodeName); err != nil { - return err + err := libovsdbops.UpdateLogicalSwitchPortSetOptions(oc.nbClient, &lsp) + if errors.Is(err, libovsdbclient.ErrNotFound) { + // if the LSP setup is already gone, then don't count it as error. + klog.Warningf("Unable to remove GARP configuration on external logical switch port for egress node: %s, err: %v", node.Name, err) + } else if err != nil { + return fmt.Errorf("unable to remove GARP configuration on external logical switch port for egress node: %s, err: %v", node.Name, err) } } - - return nil -} - -// deleteEgressIPAllocator removes the node from the allocator cache. -func (oc *DefaultNetworkController) deleteEgressIPAllocator(node *v1.Node) error { - oc.eIPC.allocator.Lock() - if eNode, exists := oc.eIPC.allocator.cache[node.Name]; exists { - eNode.healthClient.Disconnect() - } - delete(oc.eIPC.allocator.cache, node.Name) - oc.eIPC.allocator.Unlock() return nil } @@ -2163,8 +1286,6 @@ func (oc *DefaultNetworkController) initClusterEgressPolicies(nodes []interface{ return err } } - - go oc.checkEgressNodesReachability() return nil } @@ -2203,27 +1324,6 @@ func InitClusterEgressPolicies(nbClient libovsdbclient.Client, addressSetFactory return nil } -// egressNode is a cache helper used for egress IP assignment, representing an egress node -type egressNode struct { - egressIPConfig *util.ParsedNodeEgressIPConfiguration - mgmtIPs []net.IP - allocations map[string]string - healthClient healthcheck.EgressIPHealthClient - isReady bool - isReachable bool - isEgressAssignable bool - name string -} - -func (e *egressNode) getAllocationCountForEgressIP(name string) (count int) { - for _, egressIPName := range e.allocations { - if egressIPName == name { - count++ - } - } - return -} - // podAssignmentState keeps track of which egressIP object is serving // the related pod. // NOTE: At a given time only one object will be configured. This is @@ -2250,19 +1350,7 @@ func (pas *podAssignmentState) Clone() *podAssignmentState { return clone } -type allocator struct { - *sync.Mutex - // A cache used for egress IP assignments containing data for all cluster nodes - // used for egress IP assignments - cache map[string]*egressNode -} - -type egressIPController struct { - // egressIPAssignmentMutex is used to ensure a safe updates between - // concurrent go-routines which could be modifying the egress IP status - // assignment simultaneously. Currently WatchEgressNodes and WatchEgressIP - // run two separate go-routines which do this. - egressIPAssignmentMutex *sync.Mutex +type egressIPZoneController struct { // podAssignmentMutex is used to ensure safe access to podAssignment. // Currently WatchEgressIP, WatchEgressNamespace and WatchEgressPod could // all access that map simultaneously, hence why this guard is needed. @@ -2273,33 +1361,13 @@ type egressIPController struct { // podAssignment is a cache used for keeping track of which egressIP status // has been setup for each pod. The key is defined by getPodKey podAssignment map[string]*podAssignmentState - // pendingCloudPrivateIPConfigsMutex is used to ensure synchronized access - // to pendingCloudPrivateIPConfigsOps which is accessed by the egress IP and - // cloudPrivateIPConfig go-routines - pendingCloudPrivateIPConfigsMutex *sync.Mutex - // pendingCloudPrivateIPConfigsOps is a cache of pending - // CloudPrivateIPConfig changes that we are waiting on an answer for. Items - // in this map are only ever removed once the op is fully finished and we've - // been notified of this. That means: - // - On add operations we only delete once we've seen that the - // CloudPrivateIPConfig is fully added. - // - On delete: when it's fully deleted. - // - On update: once we finish processing the add - which comes after the - // delete. - pendingCloudPrivateIPConfigsOps map[string]map[string]*cloudPrivateIPConfigOp - // allocator is a cache of egress IP centric data needed to when both route - // health-checking and tracking allocations made - allocator allocator // libovsdb northbound client interface nbClient libovsdbclient.Client // watchFactory watching k8s objects watchFactory *factory.WatchFactory - // EgressIP Node reachability total timeout configuration - egressIPTotalTimeout int - // reachability check interval - reachabilityCheckInterval time.Duration - // EgressIP Node reachability gRPC port (0 means it should use dial instead) - egressIPNodeHealthCheckPort int + // A cache that maintains all nodes in the cluster, + // value will be true if local to this zone and false otherwise + nodeZoneState *syncmap.SyncMap[bool] } // addStandByEgressIPAssignment does the same setup that is done by addPodEgressIPAssignments but for @@ -2347,7 +1415,8 @@ func (oc *DefaultNetworkController) addStandByEgressIPAssignment(podKey string, // addPodEgressIPAssignment will program OVN with logical router policies // (routing pod traffic to the egress node) and NAT objects on the egress node // (SNAT-ing to the egress IP). -func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { +// This function should be called with lock on nodeZoneState cache key status.Node and pod.Spec.NodeName +func (e *egressIPZoneController) addPodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { @@ -2358,29 +1427,42 @@ func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, statu metrics.RecordEgressIPAssign(duration) }() } - - ops, err := createNATRuleOps(e.nbClient, nil, podIPs, status, egressIPName) - if err != nil { - return fmt.Errorf("unable to create NAT rule ops for status: %v, err: %v", status, err) - } - - ops, err = e.createReroutePolicyOps(ops, podIPs, status, egressIPName) - if err != nil { - return fmt.Errorf("unable to create logical router policy ops, err: %v", err) + isLocalZoneEgressNode, loadedEgressNode := e.nodeZoneState.Load(status.Node) + isLocalZonePod, loadedPodNode := e.nodeZoneState.Load(pod.Spec.NodeName) + var ops []ovsdb.Operation + if loadedEgressNode && isLocalZoneEgressNode { + ops, err = createNATRuleOps(e.nbClient, nil, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to create NAT rule ops for status: %v, err: %v", status, err) + } + if config.OVNKubernetesFeature.EnableInterconnect && (loadedPodNode && !isLocalZonePod) { + // configure reroute for non-local-zone pods on egress nodes + ops, err = e.createStaticRouteOps(ops, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to create logical router static route ops %v, err: %v", status, err) + } + } } - ops, err = e.deleteExternalGWPodSNATOps(ops, pod, podIPs, status) - if err != nil { - return err + // don't add a reroute policy if the egress node towards which we are adding this doesn't exist + if loadedEgressNode && loadedPodNode && isLocalZonePod { + ops, err = e.createReroutePolicyOps(ops, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to create logical router policy ops, err: %v", err) + } + ops, err = e.deleteExternalGWPodSNATOps(ops, pod, podIPs, status) + if err != nil { + return err + } } - _, err = libovsdbops.TransactAndCheck(e.nbClient, ops) return err } // deletePodEgressIPAssignment deletes the OVN programmed egress IP // configuration mentioned for addPodEgressIPAssignment. -func (e *egressIPController) deletePodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { +// This function should be called with lock on nodeZoneState cache key status.Node and pod.Spec.NodeName +func (e *egressIPZoneController) deletePodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { @@ -2392,22 +1474,34 @@ func (e *egressIPController) deletePodEgressIPAssignment(egressIPName string, st }() } - ops, err := e.addExternalGWPodSNATOps(nil, pod.Namespace, pod.Name, status) - if err != nil { - return err - } - - ops, err = e.deleteReroutePolicyOps(ops, podIPs, status, egressIPName) - if errors.Is(err, libovsdbclient.ErrNotFound) { - // if the gateway router join IP setup is already gone, then don't count it as error. - klog.Warningf("Unable to delete logical router policy, err: %v", err) - } else if err != nil { - return fmt.Errorf("unable to delete logical router policy, err: %v", err) + isLocalZonePod, loadedPodNode := e.nodeZoneState.Load(pod.Spec.NodeName) + var ops []ovsdb.Operation + if !loadedPodNode || isLocalZonePod { // node is deleted (we can't determine zone so we always try and nuke OR pod is local to zone) + ops, err = e.addExternalGWPodSNATOps(nil, pod.Namespace, pod.Name, status) + if err != nil { + return err + } + ops, err = e.deleteReroutePolicyOps(ops, podIPs, status, egressIPName) + if errors.Is(err, libovsdbclient.ErrNotFound) { + // if the gateway router join IP setup is already gone, then don't count it as error. + klog.Warningf("Unable to delete logical router policy, err: %v", err) + } else if err != nil { + return fmt.Errorf("unable to delete logical router policy, err: %v", err) + } } - - ops, err = deleteNATRuleOps(e.nbClient, ops, podIPs, status, egressIPName) - if err != nil { - return fmt.Errorf("unable to delete NAT rule for status: %v, err: %v", status, err) + isLocalZoneEgressNode, loadedEgressNode := e.nodeZoneState.Load(status.Node) + if loadedEgressNode && isLocalZoneEgressNode { + if config.OVNKubernetesFeature.EnableInterconnect && (!loadedPodNode || !isLocalZonePod) { // node is deleted (we can't determine zone so we always try and nuke OR pod is remote to zone) + // delete reroute for non-local-zone pods on egress nodes + ops, err = e.deleteStaticRouteOps(ops, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to delete logical router static route ops %v, err: %v", status, err) + } + } + ops, err = deleteNATRuleOps(e.nbClient, ops, podIPs, status, egressIPName) + if err != nil { + return fmt.Errorf("unable to delete NAT rule for status: %v, err: %v", status, err) + } } _, err = libovsdbops.TransactAndCheck(e.nbClient, ops) return err @@ -2424,7 +1518,7 @@ func (e *egressIPController) deletePodEgressIPAssignment(egressIPName string, st // check the informer cache since on pod deletion the event handlers are // triggered after the update to the informer cache. We should not re-add the // external GW setup in those cases. -func (e *egressIPController) addExternalGWPodSNAT(podNamespace, podName string, status egressipv1.EgressIPStatusItem) error { +func (e *egressIPZoneController) addExternalGWPodSNAT(podNamespace, podName string, status egressipv1.EgressIPStatusItem) error { ops, err := e.addExternalGWPodSNATOps(nil, podNamespace, podName, status) if err != nil { return fmt.Errorf("error creating ops for adding external gw pod snat: %+v", err) @@ -2447,9 +1541,15 @@ func (e *egressIPController) addExternalGWPodSNAT(podNamespace, podName string, // check the informer cache since on pod deletion the event handlers are // triggered after the update to the informer cache. We should not re-add the // external GW setup in those cases. -func (e *egressIPController) addExternalGWPodSNATOps(ops []ovsdb.Operation, podNamespace, podName string, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { +// This function should be called with lock on nodeZoneState cache key pod.Spec.Name +func (e *egressIPZoneController) addExternalGWPodSNATOps(ops []ovsdb.Operation, podNamespace, podName string, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { if config.Gateway.DisableSNATMultipleGWs { - if pod, err := e.watchFactory.GetPod(podNamespace, podName); err == nil && pod.Spec.NodeName == status.Node && util.PodNeedsSNAT(pod) { + pod, err := e.watchFactory.GetPod(podNamespace, podName) + if err != nil { + return nil, nil // nothing to do. + } + isLocalZonePod, loadedPodNode := e.nodeZoneState.Load(pod.Spec.NodeName) + if pod.Spec.NodeName == status.Node && loadedPodNode && isLocalZonePod && util.PodNeedsSNAT(pod) { // if the pod still exists, add snats to->nodeIP (on the node where the pod exists) for these podIPs after deleting the snat to->egressIP // NOTE: This needs to be done only if the pod was on the same node as egressNode extIPs, err := getExternalIPsGR(e.watchFactory, pod.Spec.NodeName) @@ -2471,7 +1571,7 @@ func (e *egressIPController) addExternalGWPodSNATOps(ops []ovsdb.Operation, podN } // deleteExternalGWPodSNATOps creates ops for the required external GW teardown for the given pod -func (e *egressIPController) deleteExternalGWPodSNATOps(ops []ovsdb.Operation, pod *kapi.Pod, podIPs []*net.IPNet, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { +func (e *egressIPZoneController) deleteExternalGWPodSNATOps(ops []ovsdb.Operation, pod *kapi.Pod, podIPs []*net.IPNet, status egressipv1.EgressIPStatusItem) ([]ovsdb.Operation, error) { if config.Gateway.DisableSNATMultipleGWs && status.Node == pod.Spec.NodeName { // remove snats to->nodeIP (from the node where pod exists if that node is also serving // as an egress node for this pod) for these podIPs before adding the snat to->egressIP @@ -2490,13 +1590,13 @@ func (e *egressIPController) deleteExternalGWPodSNATOps(ops []ovsdb.Operation, p return ops, nil } -func (e *egressIPController) getGatewayRouterJoinIP(node string, wantsIPv6 bool) (net.IP, error) { +func (e *egressIPZoneController) getGatewayRouterJoinIP(node string, wantsIPv6 bool) (net.IP, error) { gatewayIPs, err := util.GetLRPAddrs(e.nbClient, types.GWRouterToJoinSwitchPrefix+types.GWRouterPrefix+node) if err != nil { return nil, fmt.Errorf("attempt at finding node gateway router network information failed, err: %w", err) } if gatewayIP, err := util.MatchFirstIPNetFamily(wantsIPv6, gatewayIPs); err != nil { - return nil, fmt.Errorf("could not find node %s gateway router: %v", node, err) + return nil, fmt.Errorf("could not find gateway IP for node %s with family %v: %v", node, wantsIPv6, err) } else { return gatewayIP.IP, nil } @@ -2510,6 +1610,23 @@ func ipFamilyName(isIPv6 bool) string { return "ip4" } +func (e *egressIPZoneController) getTransitIP(nodeName string, wantsIPv6 bool) (string, error) { + // fetch node annotation of the egress node + node, err := e.watchFactory.GetNode(nodeName) + if err != nil { + return "", fmt.Errorf("failed to get node %s: %w", nodeName, err) + } + nodeTransitIPs, err := util.ParseNodeTransitSwitchPortAddrs(node) + if err != nil { + return "", fmt.Errorf("unable to fetch transit switch IP for node %s: %w", nodeName, err) + } + nodeTransitIP, err := util.MatchFirstIPNetFamily(wantsIPv6, nodeTransitIPs) + if err != nil { + return "", fmt.Errorf("could not find transit switch IP of node %v for this family %v: %v", node, wantsIPv6, err) + } + return nodeTransitIP.IP.String(), nil +} + // createReroutePolicyOps creates an operation that does idempotent updates of the // LogicalRouterPolicy corresponding to the egressIP status item, according to the // following update procedure: @@ -2518,19 +1635,37 @@ func ipFamilyName(isIPv6 bool) string { // to equal [gatewayRouterIP] // - if the LogicalRouterPolicy does exist: it adds the gatewayRouterIP to the // array of nexthops -func (e *egressIPController) createReroutePolicyOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { +// This function should be called with lock on nodeZoneState cache key status.Node +func (e *egressIPZoneController) createReroutePolicyOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) - if err != nil { - return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + var nextHopIP string + var err error + // NOTE: No need to check if status.node exists or not in the cache, we are calling this function only if it + // is present in the nodeZoneState cache. Since we call it with lock on cache, we are safe here. + isLocalZoneEgressNode, _ := e.nodeZoneState.Load(status.Node) + if isLocalZoneEgressNode { + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + } + nextHopIP = gatewayRouterIP.String() + } else if config.OVNKubernetesFeature.EnableInterconnect { + // fetch node annotation of the egress node + nextHopIP, err = e.getTransitIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to fetch transit switch IP for node %s: %v", status.Node, err) + } + } + // if neither of these above conditions are met, return error since we don't want to add an empty nextHop LRP + if nextHopIP == "" { + return nil, fmt.Errorf("unable to determine nextHop for egressIP %s with status %v", egressIPName, status) } - // Handle all pod IPs that match the egress IP address family for _, podIPNet := range util.MatchAllIPNetFamily(isEgressIPv6, podIPNets) { lrp := nbdb.LogicalRouterPolicy{ Match: fmt.Sprintf("%s.src == %s", ipFamilyName(isEgressIPv6), podIPNet.IP.String()), Priority: types.EgressIPReroutePriority, - Nexthops: []string{gatewayRouterIP.String()}, + Nexthops: []string{nextHopIP}, Action: nbdb.LogicalRouterPolicyActionReroute, ExternalIDs: map[string]string{ "name": egressIPName, @@ -2555,23 +1690,89 @@ func (e *egressIPController) createReroutePolicyOps(ops []ovsdb.Operation, podIP // the specified gatewayRouterIP from nexthops // - if the LogicalRouterPolicy exist and has the len(nexthops) == 1: it removes // the LogicalRouterPolicy completely -func (e *egressIPController) deleteReroutePolicyOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { +// This function should be called with lock on nodeZoneState cache key status.Node +func (e *egressIPZoneController) deleteReroutePolicyOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) - if err != nil { - return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + var nextHopIP string + var err error + isLocalZoneEgressNode, loadedEgressNode := e.nodeZoneState.Load(status.Node) + if loadedEgressNode && isLocalZoneEgressNode { + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + } + nextHopIP = gatewayRouterIP.String() + } else if config.OVNKubernetesFeature.EnableInterconnect { + // fetch node annotation of the egress node + nextHopIP, err = e.getTransitIP(status.Node, isEgressIPv6) + if apierrors.IsNotFound(err) { + // if the node is already gone, then don't count it as error as during deletion. + klog.Warningf("Unable to fetch transit switch IP for node: %s: err: %v", status.Node, err) + } else if err != nil { + return nil, fmt.Errorf("unable to fetch transit switch IP for node %s: %v", status.Node, err) + } + } + // if neither of these above conditions are met, return error since we can't remove an empty nextHop LRP + if nextHopIP == "" { + return nil, fmt.Errorf("unable to determine nextHop for egressIP %s with status %v", egressIPName, status) } - // Handle all pod IPs that match the egress IP address family for _, podIPNet := range util.MatchAllIPNetFamily(isEgressIPv6, podIPNets) { filterOption := fmt.Sprintf("%s.src == %s", ipFamilyName(isEgressIPv6), podIPNet.IP.String()) p := func(item *nbdb.LogicalRouterPolicy) bool { return item.Match == filterOption && item.Priority == types.EgressIPReroutePriority && item.ExternalIDs["name"] == egressIPName } - ops, err = libovsdbops.DeleteNextHopFromLogicalRouterPoliciesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, p, gatewayRouterIP.String()) + ops, err = libovsdbops.DeleteNextHopFromLogicalRouterPoliciesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, p, nextHopIP) if err != nil { return nil, fmt.Errorf("error removing nexthop IP %s from egress ip %s policies on router %s: %v", - gatewayRouterIP, egressIPName, types.OVNClusterRouter, err) + nextHopIP, egressIPName, types.OVNClusterRouter, err) + } + } + return ops, nil +} + +func (e *egressIPZoneController) createStaticRouteOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { + isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + } + // Handle all pod IPs that match the egress IP address family + for _, podIPNet := range util.MatchAllIPNetFamily(isEgressIPv6, podIPNets) { + lrsr := nbdb.LogicalRouterStaticRoute{ + IPPrefix: podIPNet.IP.String(), + Nexthop: gatewayRouterIP.String(), + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + } + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.IPPrefix == lrsr.IPPrefix && item.Nexthop == lrsr.Nexthop && item.ExternalIDs["name"] == lrsr.ExternalIDs["name"] && item.Policy == lrsr.Policy + } + + ops, err = libovsdbops.CreateOrUpdateLogicalRouterStaticRoutesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, &lrsr, p) + if err != nil { + return nil, fmt.Errorf("error creating logical router static route %+v on router %s: %v", lrsr, types.OVNClusterRouter, err) + } + } + return ops, nil +} + +func (e *egressIPZoneController) deleteStaticRouteOps(ops []ovsdb.Operation, podIPNets []*net.IPNet, status egressipv1.EgressIPStatusItem, egressIPName string) ([]ovsdb.Operation, error) { + isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %w", status.Node, isEgressIPv6, err) + } + // Handle all pod IPs that match the egress IP address family + for _, podIPNet := range util.MatchAllIPNetFamily(isEgressIPv6, podIPNets) { + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.IPPrefix == podIPNet.IP.String() && item.Nexthop == gatewayRouterIP.String() && item.ExternalIDs["name"] == egressIPName && item.Policy != nil && *item.Policy == nbdb.LogicalRouterStaticRoutePolicySrcIP + } + ops, err = libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, p) + if err != nil { + return nil, fmt.Errorf("error deleting logical router static route on router %s: %v", types.OVNClusterRouter, err) } } return ops, nil @@ -2585,52 +1786,82 @@ func (e *egressIPController) deleteReroutePolicyOps(ops []ovsdb.Operation, podIP // gatewayRouterIP corresponding to the node in the EgressIPStatusItem, else // just remove the gatewayRouterIP from the list of nexthops // It also returns the list of podIPs whose routes and SNAT's were deleted -func (e *egressIPController) deleteEgressIPStatusSetup(name string, status egressipv1.EgressIPStatusItem) ([]net.IP, error) { +// This function should be called with a lock on e.nodeZoneState.status.Node +func (e *egressIPZoneController) deleteEgressIPStatusSetup(name string, status egressipv1.EgressIPStatusItem) ([]net.IP, error) { isEgressIPv6 := utilnet.IsIPv6String(status.EgressIP) - gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) - if errors.Is(err, libovsdbclient.ErrNotFound) { - // if the gateway router join IP setup is already gone, then don't count it as error. - klog.Warningf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) - } else if err != nil { - return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + var nextHopIP string + var err error + isLocalZoneEgressNode, loadedEgressNode := e.nodeZoneState.Load(status.Node) + if loadedEgressNode && isLocalZoneEgressNode { + gatewayRouterIP, err := e.getGatewayRouterJoinIP(status.Node, isEgressIPv6) + if errors.Is(err, libovsdbclient.ErrNotFound) { + // if the gateway router join IP setup is already gone, then don't count it as error. + klog.Warningf("Unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + } else if err != nil { + return nil, fmt.Errorf("unable to retrieve gateway IP for node: %s, protocol is IPv6: %v, err: %v", status.Node, isEgressIPv6, err) + } + if gatewayRouterIP != nil { + nextHopIP = gatewayRouterIP.String() + } + } else if config.OVNKubernetesFeature.EnableInterconnect { + // fetch node annotation of the egress node + nextHopIP, err = e.getTransitIP(status.Node, isEgressIPv6) + if apierrors.IsNotFound(err) { + // if the node is already gone, then don't count it as error as during deletion. + // technically speaking, the egressNode handler should always get the delete event first before node handler + klog.Warningf("Unable to fetch transit switch IP for node: %s: err: %v", status.Node, err) + } else if err != nil { + return nil, fmt.Errorf("unable to fetch transit switch IP for node %s: %w", status.Node, err) + } } var ops []ovsdb.Operation - if gatewayRouterIP != nil { - gwIP := gatewayRouterIP.String() + if nextHopIP != "" { policyPred := func(item *nbdb.LogicalRouterPolicy) bool { hasGatewayRouterIPNexthop := false for _, nexthop := range item.Nexthops { - if nexthop == gwIP { + if nexthop == nextHopIP { hasGatewayRouterIPNexthop = true break } } return item.Priority == types.EgressIPReroutePriority && item.ExternalIDs["name"] == name && hasGatewayRouterIPNexthop } - ops, err = libovsdbops.DeleteNextHopFromLogicalRouterPoliciesWithPredicateOps(e.nbClient, nil, types.OVNClusterRouter, policyPred, gwIP) + ops, err = libovsdbops.DeleteNextHopFromLogicalRouterPoliciesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, policyPred, nextHopIP) if err != nil { return nil, fmt.Errorf("error removing nexthop IP %s from egress ip %s policies on router %s: %v", - gatewayRouterIP, name, types.OVNClusterRouter, err) + nextHopIP, name, types.OVNClusterRouter, err) } } - routerName := util.GetGatewayRouterFromNode(status.Node) - natPred := func(nat *nbdb.NAT) bool { - return nat.ExternalIDs["name"] == name && nat.ExternalIP == status.EgressIP - } - nats, err := libovsdbops.FindNATsWithPredicate(e.nbClient, natPred) // save the nats to get the podIPs before that nats get deleted - if err != nil { - return nil, fmt.Errorf("error removing egress ip pods from adress set %s: %v", EgressIPServedPodsAddrSetName, err) - } - ops, err = libovsdbops.DeleteNATsWithPredicateOps(e.nbClient, ops, natPred) - if err != nil { - return nil, fmt.Errorf("error removing egress ip %s nats on router %s: %v", name, routerName, err) + var nats []*nbdb.NAT + if loadedEgressNode && isLocalZoneEgressNode { + if config.OVNKubernetesFeature.EnableInterconnect && nextHopIP != "" { + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.Nexthop == nextHopIP && item.ExternalIDs["name"] == name && item.Policy != nil && *item.Policy == nbdb.LogicalRouterStaticRoutePolicySrcIP + } + ops, err = libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicateOps(e.nbClient, ops, types.OVNClusterRouter, p) + if err != nil { + return nil, fmt.Errorf("error deleting logical router static routes on router %s for %s: %w", types.OVNClusterRouter, name, err) + } + } + routerName := util.GetGatewayRouterFromNode(status.Node) + natPred := func(nat *nbdb.NAT) bool { + // We should delete NATs only from the status.Node that was passed into this function + return nat.ExternalIDs["name"] == name && nat.ExternalIP == status.EgressIP && nat.LogicalPort != nil && *nat.LogicalPort == types.K8sPrefix+status.Node + } + nats, err = libovsdbops.FindNATsWithPredicate(e.nbClient, natPred) // save the nats to get the podIPs before that nats get deleted + if err != nil { + return nil, fmt.Errorf("error removing egress ip pods from adress set %s: %v", EgressIPServedPodsAddrSetName, err) + } + ops, err = libovsdbops.DeleteNATsWithPredicateOps(e.nbClient, ops, natPred) + if err != nil { + return nil, fmt.Errorf("error removing egress ip %s nats on router %s: %v", name, routerName, err) + } } - _, err = libovsdbops.TransactAndCheck(e.nbClient, ops) if err != nil { - return nil, fmt.Errorf("error trasnsacting ops %+v: %v", ops, err) + return nil, fmt.Errorf("error transacting ops %+v: %v", ops, err) } var podIPs []net.IP for i := range nats { @@ -2666,157 +1897,6 @@ func (oc *DefaultNetworkController) deletePodIPsFromAddressSet(addrSetIPs []net. return nil } -// checkEgressNodesReachability continuously checks if all nodes used for egress -// IP assignment are reachable, and updates the nodes following the result. This -// is important because egress IP is based upon routing traffic to these nodes, -// and if they aren't reachable we shouldn't be using them for egress IP. -func (oc *DefaultNetworkController) checkEgressNodesReachability() { - timer := time.NewTicker(oc.eIPC.reachabilityCheckInterval) - defer timer.Stop() - for { - select { - case <-timer.C: - checkEgressNodesReachabilityIterate(oc) - case <-oc.stopChan: - klog.V(5).Infof("Stop channel got triggered: will stop checkEgressNodesReachability") - return - } - } -} - -func checkEgressNodesReachabilityIterate(oc *DefaultNetworkController) { - reAddOrDelete := map[string]bool{} - oc.eIPC.allocator.Lock() - for _, eNode := range oc.eIPC.allocator.cache { - if eNode.isEgressAssignable && eNode.isReady { - wasReachable := eNode.isReachable - isReachable := oc.isReachable(eNode.name, eNode.mgmtIPs, eNode.healthClient) - if wasReachable && !isReachable { - reAddOrDelete[eNode.name] = true - } else if !wasReachable && isReachable { - reAddOrDelete[eNode.name] = false - } - eNode.isReachable = isReachable - } else { - // End connection (if there is one). This is important because - // it accounts for cases where node is not labelled with - // egress-assignable, so connection is no longer needed. Calling - // this on a already disconnected node is expected to be cheap. - eNode.healthClient.Disconnect() - } - } - oc.eIPC.allocator.Unlock() - for nodeName, shouldDelete := range reAddOrDelete { - if shouldDelete { - metrics.RecordEgressIPUnreachableNode() - klog.Warningf("Node: %s is detected as unreachable, deleting it from egress assignment", nodeName) - if err := oc.deleteEgressNode(nodeName); err != nil { - klog.Errorf("Node: %s is detected as unreachable, but could not re-assign egress IPs, err: %v", nodeName, err) - } - } else { - klog.Infof("Node: %s is detected as reachable and ready again, adding it to egress assignment", nodeName) - if err := oc.addEgressNode(nodeName); err != nil { - klog.Errorf("Node: %s is detected as reachable and ready again, but could not re-assign egress IPs, err: %v", nodeName, err) - } - } - } -} - -func (oc *DefaultNetworkController) isReachable(nodeName string, mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient) bool { - // Check if we need to do node reachability check - if oc.eIPC.egressIPTotalTimeout == 0 { - return true - } - - if oc.eIPC.egressIPNodeHealthCheckPort == 0 { - return isReachableLegacy(nodeName, mgmtIPs, oc.eIPC.egressIPTotalTimeout) - } - return isReachableViaGRPC(mgmtIPs, healthClient, oc.eIPC.egressIPNodeHealthCheckPort, oc.eIPC.egressIPTotalTimeout) -} - -func isReachableLegacy(node string, mgmtIPs []net.IP, totalTimeout int) bool { - var retryTimeOut, initialRetryTimeOut time.Duration - - numMgmtIPs := len(mgmtIPs) - if numMgmtIPs == 0 { - return false - } - - switch totalTimeout { - // Check if we need to do node reachability check - case 0: - return true - case 1: - // Using time duration for initial retry with 700/numIPs msec and retry of 100/numIPs msec - // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. - initialRetryTimeOut = time.Duration(700/numMgmtIPs) * time.Millisecond - retryTimeOut = time.Duration(100/numMgmtIPs) * time.Millisecond - default: - // Using time duration for initial retry with 900/numIPs msec - // to ensure total wait time will be in range with the configured value including a sleep of 100msec between attempts. - initialRetryTimeOut = time.Duration(900/numMgmtIPs) * time.Millisecond - retryTimeOut = initialRetryTimeOut - } - - timeout := initialRetryTimeOut - endTime := time.Now().Add(time.Second * time.Duration(totalTimeout)) - for time.Now().Before(endTime) { - for _, ip := range mgmtIPs { - if dialer.dial(ip, timeout) { - return true - } - } - time.Sleep(100 * time.Millisecond) - timeout = retryTimeOut - } - klog.Errorf("Failed reachability check for %s", node) - return false -} - -type egressIPDial struct{} - -// Blantant copy from: https://github.com/openshift/sdn/blob/master/pkg/network/common/egressip.go#L499-L505 -// Ping a node and return whether or not we think it is online. We do this by trying to -// open a TCP connection to the "discard" service (port 9); if the node is offline, the -// attempt will either time out with no response, or else return "no route to host" (and -// we will return false). If the node is online then we presumably will get a "connection -// refused" error; but the code below assumes that anything other than timeout or "no -// route" indicates that the node is online. -func (e *egressIPDial) dial(ip net.IP, timeout time.Duration) bool { - conn, err := net.DialTimeout("tcp", net.JoinHostPort(ip.String(), "9"), timeout) - if conn != nil { - conn.Close() - } - if opErr, ok := err.(*net.OpError); ok { - if opErr.Timeout() { - return false - } - if sysErr, ok := opErr.Err.(*os.SyscallError); ok && sysErr.Err == syscall.EHOSTUNREACH { - return false - } - } - return true -} - -type egressIPHealthcheckClientAllocator struct{} - -func (hccAlloc *egressIPHealthcheckClientAllocator) allocate(nodeName string) healthcheck.EgressIPHealthClient { - return healthcheck.NewEgressIPHealthClient(nodeName) -} - -func isReachableViaGRPC(mgmtIPs []net.IP, healthClient healthcheck.EgressIPHealthClient, healthCheckPort, totalTimeout int) bool { - dialCtx, dialCancel := context.WithTimeout(context.Background(), time.Duration(totalTimeout)*time.Second) - defer dialCancel() - - if !healthClient.IsConnected() { - // gRPC session is not up. Attempt to connect and if that suceeds, we will declare node as reacheable. - return healthClient.Connect(dialCtx, mgmtIPs, healthCheckPort) - } - - // gRPC session is already established. Send a probe, which will succeed, or close the session. - return healthClient.Probe(dialCtx) -} - func getClusterSubnets() ([]*net.IPNet, []*net.IPNet) { var v4ClusterSubnets = []*net.IPNet{} var v6ClusterSubnets = []*net.IPNet{} @@ -3049,61 +2129,3 @@ func getPodNamespaceAndNameFromKey(podKey string) (string, string) { parts := strings.Split(podKey, "_") return parts[0], parts[1] } - -func getEgressIPAllocationTotalCount(allocator allocator) float64 { - count := 0 - allocator.Lock() - defer allocator.Unlock() - for _, eNode := range allocator.cache { - count += len(eNode.allocations) - } - return float64(count) -} - -// cloudPrivateIPConfigNameToIPString converts the resource name to the string -// representation of net.IP. Given a limitation in the Kubernetes API server -// (see: https://github.com/kubernetes/kubernetes/pull/100950) -// CloudPrivateIPConfig.metadata.name cannot represent an IPv6 address. To -// work-around this limitation it was decided that the network plugin creating -// the CR will fully expand the IPv6 address and replace all colons with dots, -// ex: - -// The CloudPrivateIPConfig name fc00.f853.0ccd.e793.0000.0000.0000.0054 will be -// represented as address: fc00:f853:ccd:e793::54 - -// We thus need to replace every fifth character's dot with a colon. -func cloudPrivateIPConfigNameToIPString(name string) string { - // Handle IPv4, which will work fine. - if ip := net.ParseIP(name); ip != nil { - return name - } - // Handle IPv6, for which we want to convert the fully expanded "special - // name" to go's default IP representation - name = strings.ReplaceAll(name, ".", ":") - return net.ParseIP(name).String() -} - -// ipStringToCloudPrivateIPConfigName converts the net.IP string representation -// to a CloudPrivateIPConfig compatible name. - -// The string representation of the IPv6 address fc00:f853:ccd:e793::54 will be -// represented as: fc00.f853.0ccd.e793.0000.0000.0000.0054 - -// We thus need to fully expand the IP string and replace every fifth -// character's colon with a dot. -func ipStringToCloudPrivateIPConfigName(ipString string) (name string) { - ip := net.ParseIP(ipString) - if ip.To4() != nil { - return ipString - } - dst := make([]byte, hex.EncodedLen(len(ip))) - hex.Encode(dst, ip) - for i := 0; i < len(dst); i += 4 { - if len(dst)-i == 4 { - name += string(dst[i : i+4]) - } else { - name += string(dst[i:i+4]) + "." - } - } - return -} diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index 57a6cb03c2..0406dc2f6d 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -7,13 +7,13 @@ import ( "time" "github.com/onsi/ginkgo" + ginkgotable "github.com/onsi/ginkgo/extensions/table" "github.com/onsi/gomega" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" @@ -22,61 +22,13 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/urfave/cli/v2" - kapi "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" k8stypes "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/sets" - utilnet "k8s.io/utils/net" utilpointer "k8s.io/utils/pointer" ) -type fakeEgressIPDialer struct{} - -func (f fakeEgressIPDialer) dial(ip net.IP, timeout time.Duration) bool { - return true -} - -type fakeEgressIPHealthClient struct { - Connected bool - ProbeCount int - FakeProbeFailure bool -} - -func (fehc *fakeEgressIPHealthClient) IsConnected() bool { - return fehc.Connected -} - -func (fehc *fakeEgressIPHealthClient) Connect(dialCtx context.Context, mgmtIPs []net.IP, healthCheckPort int) bool { - if fehc.FakeProbeFailure { - return false - } - fehc.Connected = true - return true -} - -func (fehc *fakeEgressIPHealthClient) Disconnect() { - fehc.Connected = false - fehc.ProbeCount = 0 -} - -func (fehc *fakeEgressIPHealthClient) Probe(dialCtx context.Context) bool { - if fehc.Connected && !fehc.FakeProbeFailure { - fehc.ProbeCount++ - return true - } - return false -} - -type fakeEgressIPHealthClientAllocator struct{} - -func (f *fakeEgressIPHealthClientAllocator) allocate(nodeName string) healthcheck.EgressIPHealthClient { - return &fakeEgressIPHealthClient{} -} - var ( - reroutePolicyID = "reroute_policy_id" - natID = "nat_id" nodeLogicalRouterIPv6 = []string{"fef0::56"} nodeLogicalRouterIPv4 = []string{"100.64.0.2"} node2LogicalRouterIPv4 = []string{"100.64.0.3"} @@ -114,51 +66,6 @@ func newEgressIPMeta(name string) metav1.ObjectMeta { var egressPodLabel = map[string]string{"egress": "needed"} -func setupNode(nodeName string, ipNets []string, mockAllocationIPs map[string]string) egressNode { - var v4IP, v6IP net.IP - var v4Subnet, v6Subnet *net.IPNet - for _, ipNet := range ipNets { - ip, net, _ := net.ParseCIDR(ipNet) - if utilnet.IsIPv6CIDR(net) { - v6Subnet = net - v6IP = ip - } else { - v4Subnet = net - v4IP = ip - } - } - - mockAllcations := map[string]string{} - for mockAllocationIP, egressIPName := range mockAllocationIPs { - mockAllcations[net.ParseIP(mockAllocationIP).String()] = egressIPName - } - - node := egressNode{ - egressIPConfig: &util.ParsedNodeEgressIPConfiguration{ - V4: util.ParsedIFAddr{ - IP: v4IP, - Net: v4Subnet, - }, - V6: util.ParsedIFAddr{ - IP: v6IP, - Net: v6Subnet, - }, - Capacity: util.Capacity{ - IP: util.UnlimitedNodeCapacity, - IPv4: util.UnlimitedNodeCapacity, - IPv6: util.UnlimitedNodeCapacity, - }, - }, - allocations: mockAllcations, - healthClient: hccAllocator.allocate(nodeName), // using fakeEgressIPHealthClientAllocator - name: nodeName, - isReady: true, - isReachable: true, - isEgressAssignable: true, - } - return node -} - var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { var ( app *cli.App @@ -178,15 +85,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } - dialer = fakeEgressIPDialer{} - hccAllocator = &fakeEgressIPHealthClientAllocator{} - - getEgressIPAllocatorSizeSafely := func() int { - fakeOvn.controller.eIPC.allocator.Lock() - defer fakeOvn.controller.eIPC.allocator.Unlock() - return len(fakeOvn.controller.eIPC.allocator.cache) - } - getEgressIPStatusLen := func(egressIPName string) func() int { return func() int { tmp, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), egressIPName, metav1.GetOptions{}) @@ -218,17 +116,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { return reAssignmentCount } - isEgressAssignableNode := func(nodeName string) func() bool { - return func() bool { - fakeOvn.controller.eIPC.allocator.Lock() - defer fakeOvn.controller.eIPC.allocator.Unlock() - if item, exists := fakeOvn.controller.eIPC.allocator.cache[nodeName]; exists { - return item.isEgressAssignable - } - return false - } - } - nodeSwitch := func() string { _, nodes := getEgressIPStatus(egressIPName) if len(nodes) != 1 { @@ -254,7 +141,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.shutdown() }) - getPodAssignmentState := func(pod *kapi.Pod) *podAssignmentState { + getPodAssignmentState := func(pod *v1.Pod) *podAssignmentState { fakeOvn.controller.eIPC.podAssignmentMutex.Lock() defer fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() if pas := fakeOvn.controller.eIPC.podAssignment[getPodKey(pod)]; pas != nil { @@ -265,79 +152,295 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ginkgo.Context("On node UPDATE", func() { - ginkgo.It("should re-assign EgressIPs and perform proper OVN transactions when pod is created after node egress label switch", func() { - app.Action = func(ctx *cli.Context) error { + ginkgotable.DescribeTable("should perform proper OVN transactions when pod is created after node egress label switch", + func(interconnect bool) { + app.Action = func(ctx *cli.Context) error { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }) - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} + fakeOvn.controller.nbClient.Get(context.Background(), lsp) + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + + fakeOvn.patchEgressIPObj(node1Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1.Labels = map[string]string{} + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: node2LogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled", false), + ginkgotable.Entry("interconnect enabled", true), + ) + + ginkgotable.DescribeTable("using EgressNode retry should perform proper OVN transactions when pod is created after node egress label switch", + func(interconnect bool) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + node3IPv4 := "192.168.126.0/24" + + egressPod := *newPodWithLabels(namespace, podName, "node3", podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node3IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels = map[string]string{} + node3 := getNodeObj("node3", annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + initialDB := libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + Networks: []string{node2LogicalRouterIfAddrV4}, }, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", @@ -373,561 +476,1191 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + } + fakeOvn.startWithDBSetup( + initialDB, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2, node3}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }) - lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} - fakeOvn.controller.nbClient.Get(context.Background(), lsp) - gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) - gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} + err = fakeOvn.controller.nbClient.Get(context.Background(), lsp) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) - node1.Labels = map[string]string{} - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } + lsp = &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name} + err = fakeOvn.controller.nbClient.Get(context.Background(), lsp) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + node1.Labels = map[string]string{} + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node2.Annotations["k8s.ovn.org/host-addresses"] = fmt.Sprintf("[\"%s\",\"%s\"]", "192.168.126.51", "") + ginkgo.By("Bringing down NBDB") + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) + err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail + // there is a chance that both egressnode events(node1 removal and node2 update) will end up in the same event queue + // sleep for double the time to allow for two consecutive TransactWithRetry timeouts + time.Sleep(2 * (types.OVSDBTimeout + time.Second)) + // check to see if the retry cache has an entry + key1 := node1.Name + ginkgo.By("retry entry: old obj should not be nil, new obj should be nil") + retry.CheckRetryObjectMultipleFieldsEventually( + key1, + fakeOvn.controller.retryEgressNodes, + gomega.Not(gomega.BeNil()), // oldObj should not be nil + gomega.BeNil(), // newObj should be nil + ) + + key2 := node2.Name + ginkgo.By("retry entry: old obj should be nil, new obj should not be nil, config should not be nil") + retry.CheckRetryObjectMultipleFieldsEventually( + key2, + fakeOvn.controller.retryEgressNodes, + gomega.BeNil(), // oldObj should be nil + gomega.Not(gomega.BeNil()), // newObj should not be nil + gomega.Not(gomega.BeNil()), // config should not be nil + ) + fakeOvn.patchEgressIPObj(node2Name, egressIP) + connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) + defer cancel() + resetNBClient(connCtx, fakeOvn.controller.nbClient) + retry.SetRetryObjWithNoBackoff(key1, fakeOvn.controller.retryEgressNodes) + retry.SetRetryObjWithNoBackoff(key2, fakeOvn.controller.retryEgressNodes) + fakeOvn.controller.retryEgressNodes.RequestRetryObjs() + // check the cache no longer has the entry + retry.CheckRetryObjectEventually(key1, false, fakeOvn.controller.retryEgressNodes) + retry.CheckRetryObjectEventually(key2, false, fakeOvn.controller.retryEgressNodes) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: node2LogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "(ip4.src == $a4548040316634674295 || ip4.src == $a13607449821398607916) && ip4.dst == $a14918748166599097711", + Action: nbdb.LogicalRouterPolicyActionAllow, + Options: map[string]string{"pkt_mark": "1008"}, + UUID: "no-reroute-node-UUID", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID", "no-reroute-node-UUID"}, }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, }, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + /* GARP is deleted since node has been deleted */ + // "nat-addresses": "router", + // "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled", false), + ginkgotable.Entry("interconnect enabled", true), // all 3 nodes in same zone, so behaves like non-ic + ) + + ginkgotable.DescribeTable("should perform proper OVN transactions when namespace and pod is created after node egress label switch", + func(interconnect bool, node1Zone, node2Zone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node1Zone, // used only for ic=true test + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.3/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: nil, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) - ginkgo.It("using EgressNode retry should re-assign EgressIPs and perform proper OVN transactions when pod is created after node egress label switch", func() { - app.Action = func(ctx *cli.Context) error { + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + fakeOvn.patchEgressIPObj(node1Name, egressIP) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} + fakeOvn.controller.nbClient.Get(context.Background(), lsp) + if node1Zone == "global" { + // GARP is configured only for nodes in local zones, the master of the remote zone will do it for the remote nodes + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + } + + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1.Labels = map[string]string{} + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Create(context.TODO(), egressNamespace, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + expectedNatLogicalPort := "k8s-node2" + reroutePolicyNextHop := node2LogicalRouterIPv4 + if interconnect && node1Zone != node2Zone { + reroutePolicyNextHop = []string{"168.254.0.3"} // node2's transit switch portIP + } + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", reroutePolicyNextHop), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, }, }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + } + if node1Zone == "global" { + // GARP is configured only for nodes in local zones, the master of the remote zone will do it for the remote nodes + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } + if node2Zone != "remote" { + // either not interconnect or egressNode is in localZone + eipSNAT := getEIPSNAT(podV4IP, egressIP, expectedNatLogicalPort) + expectedDatabaseState = append(expectedDatabaseState, eipSNAT) + expectedDatabaseState[4].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-UUID"} // 4th item is node2's GR + // add GARP config only if node is in local zone + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } + if node2Zone != node1Zone && node2Zone == "global" { + // node2 will have a static route added for the pod + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(podV4IP, node2LogicalRouterIPv4[0])) + expectedDatabaseState[5].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState[5].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] // reroute logical router policy is in remote zone, hence not visible + } + + gomega.Eventually(fakeOvn.nbClient, inspectTimeout).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + return nil } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global", "global"), + ginkgotable.Entry("interconnect enabled; node1 and node2 in global zones", true, "global", "global"), + // will showcase localzone setup - master is in pod's zone where pod's reroute policy towards egressNode will be done. + // NOTE: SNAT won't be visible because its in remote zone + ginkgotable.Entry("interconnect enabled; node1 in global and node2 in remote zones", true, "global", "remote"), + // will showcase localzone setup - master is in egress node's zone where pod's SNAT policy and static route will be done. + // NOTE: reroute policy won't be visible because its in remote zone (pod is in remote zone) + ginkgotable.Entry("interconnect enabled; node1 in remote and node2 in global zones", true, "remote", "global"), + ) + }) + + ginkgo.Context("On node DELETE", func() { + + ginkgotable.DescribeTable("should perform proper OVN transactions when node's gateway objects are already deleted", + func(interconnect bool, node1Zone, node2Zone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := "192.168.126.101" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node1Zone, + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.3/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, }, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.OVNJoinSwitch + "-UUID", + Name: types.OVNJoinSwitch, }, }, }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }) + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }) - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) - lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} - err = fakeOvn.controller.nbClient.Get(context.Background(), lsp) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) - gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} + fakeOvn.controller.nbClient.Get(context.Background(), lsp) + if node1Zone == "global" { + // only if node is global we add this + gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) + gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + } - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - node1.Labels = map[string]string{} - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Bringing down NBDB") - // inject transient problem, nbdb is down - fakeOvn.controller.nbClient.Close() - gomega.Eventually(func() bool { - return fakeOvn.controller.nbClient.Connected() - }).Should(gomega.BeFalse()) - err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail - // there is a chance that both egressnode events(node1 removal and node2 update) will end up in the same event queue - // sleep for double the time to allow for two consecutive TransactWithRetry timeouts - time.Sleep(2 * (types.OVSDBTimeout + time.Second)) - // check to see if the retry cache has an entry - key1 := node1.Name - ginkgo.By("retry entry: old obj should not be nil, new obj should be nil") - retry.CheckRetryObjectMultipleFieldsEventually( - key1, - fakeOvn.controller.retryEgressNodes, - gomega.Not(gomega.BeNil()), // oldObj should not be nil - gomega.BeNil(), // newObj should be nil - ) + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - key2 := node2.Name - ginkgo.By("retry entry: old obj should be nil, new obj should not be nil, config should not be nil") - retry.CheckRetryObjectMultipleFieldsEventually( - key2, - fakeOvn.controller.retryEgressNodes, - gomega.BeNil(), // oldObj should be nil - gomega.Not(gomega.BeNil()), // newObj should not be nil - gomega.Not(gomega.BeNil()), // config should not be nil - ) + expectedNatLogicalPort := "k8s-node1" + primarySNAT := getEIPSNAT(podV4IP, egressIP, expectedNatLogicalPort) + primarySNAT.UUID = "egressip-nat1-UUID" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", nodeLogicalRouterIPv4), + primarySNAT, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat1-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.OVNJoinSwitch + "-UUID", + Name: types.OVNJoinSwitch, + }, + } + if node2Zone != "remote" { + // add GARP config only if node is in local zone + expectedDatabaseState[10].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[10].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } + if node1Zone != "remote" { + // add GARP config only if node is in local zone + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } else { + // if node1 where the pod lives is remote we can't see the EIP setup done since master belongs to local zone + expectedDatabaseState[4].(*nbdb.LogicalRouter).Nat = []string{} + expectedDatabaseState[6].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState = expectedDatabaseState[2:] + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) - defer cancel() - resetNBClient(connCtx, fakeOvn.controller.nbClient) - retry.SetRetryObjWithNoBackoff(key1, fakeOvn.controller.retryEgressNodes) - retry.SetRetryObjWithNoBackoff(key2, fakeOvn.controller.retryEgressNodes) - fakeOvn.controller.retryEgressNodes.RequestRetryObjs() - // check the cache no longer has the entry - retry.CheckRetryObjectEventually(key1, false, fakeOvn.controller.retryEgressNodes) - retry.CheckRetryObjectEventually(key2, false, fakeOvn.controller.retryEgressNodes) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + err = fakeOvn.controller.gatewayCleanup(node1Name) // simulate an already deleted node + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, + // NOTE: This test checks if plumbing is removed when node is gone but pod on the node is still present (unusual scenario) + // Thus we need to check the cache state to verify things in unit tests to avoid races - we don't control the order of + // node's deletion removing the entry from localZonesCache versus the add happening for the pod. + // (in real env this won't be a problem since eventually things will reconcile as pod will also be gone if node is gone) + gomega.Eventually(func() bool { + _, ok := fakeOvn.controller.eIPC.nodeZoneState.Load(egressPod.Spec.NodeName) + return ok + }).Should(gomega.BeFalse()) + + // W0608 12:53:33.728205 1161455 egressip.go:2030] Unable to retrieve gateway IP for node: node1, protocol is IPv6: false, err: attempt at finding node gateway router network information failed, err: unable to find router port rtoj-GR_node1: object not found + // 2023-04-25T11:01:13.2804834Z W0425 11:01:13.280407 21055 egressip.go:2036] Unable to fetch transit switch IP for node: node1: err: failed to get node node1: node "node1" not found + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) // egressIP successfully reassigned to node2 + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + expectedNatLogicalPort = "k8s-node2" + eipSNAT := getEIPSNAT(podV4IP, egressIP, expectedNatLogicalPort) + expectedDatabaseState = []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", node2LogicalRouterIPv4), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{node2LogicalRouterIfAddrV4}, }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, }, - }, - } + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + &nbdb.LogicalSwitch{ + UUID: types.OVNJoinSwitch + "-UUID", + Name: types.OVNJoinSwitch, + }, + } + if node2Zone != "remote" { + // either not interconnect or egressNode is in localZone + expectedDatabaseState = append(expectedDatabaseState, eipSNAT) + expectedDatabaseState[3].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-UUID"} // 4th item is node2's GR + // add GARP config only if node is in local zone + expectedDatabaseState[7].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[7].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + } + if node1Zone == "global" { + // even if node1's GR is gone already, in the libovsdb test framework we need to explicitly remove the NATs from GR. + // This won't be a problem in real env, See https://github.com/ovn-org/libovsdb/issues/338 for details. + // Hence we will be left with a stale SNAT + // in non-IC setup, this is not dependent on localZoneNodes cache; we nuke all SNATs for this pod on all nodes + // hence we need to do this only when its IC and pod is in local zone + expectedDatabaseState = append(expectedDatabaseState, primarySNAT) + } + // all cases: reroute logical router policy is gone and won't be recreated since node1 is deleted - that is where the pod lives + // NOTE: This test is not really a real scenario, it depicts a transient state. + expectedDatabaseState[4].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global", "global"), + ginkgotable.Entry("interconnect enabled; node1 and node2 in global zones", true, "global", "global"), + // will showcase localzone setup - master is in pod's zone where pod's reroute policy towards egressNode will be done. + // NOTE: SNAT won't be visible because its in remote zone + ginkgotable.Entry("interconnect enabled; node1 in global and node2 in remote zones", true, "global", "remote"), + // will showcase localzone setup - master is in egress node's zone where pod's SNAT policy and static route will* be done. + // * the static route won't be visible because the pod's node node1 is getting deleted in this test + // NOTE: reroute policy won't be visible because its in remote zone (pod is in remote zone) + ginkgotable.Entry("interconnect enabled; node1 in remote and node2 in global zones", true, "remote", "global"), + ) + }) - ginkgo.It("should re-assign EgressIPs and perform proper OVN transactions when namespace and pod is created after node egress label switch", func() { - app.Action = func(ctx *cli.Context) error { + ginkgo.Context("IPv6 on pod UPDATE", func() { - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + ginkgotable.DescribeTable("should remove OVN pod egress setup when EgressIP stops matching pod label", + func(interconnect, isnode1Local, isnode2Local bool) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + node2IPv4 := "192.168.126.202/24" + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v6NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\", \"ipv6\": \"fd97::2/64\"}", // used only for ic=true test + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) // add node to avoid errori-ing out on transit switch IP fetch + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, }, }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &v1.NodeList{ + Items: []v1.Node{node2}, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + ) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, }, }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } + } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, isnode1Local) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, isnode2Local) + if isnode1Local { + fakeOvn.controller.localZoneNodes.Store(node1Name, true) + } + if isnode2Local { + fakeOvn.controller.localZoneNodes.Store(node2Name, true) + } + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + } + if !isnode1Local { + // case4: egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + if !isnode2Local { + // case3: pod's SNAT is not visible because egress node is remote + expectedDatabaseState[5].(*nbdb.LogicalRouter).Nat = []string{} + expectedDatabaseState = expectedDatabaseState[2:] + // add policy with nextHop towards egressNode's transit switchIP + expectedDatabaseState = append(expectedDatabaseState, getReRoutePolicy(egressPod.Status.PodIP, "6", []string{"fd97::2"})) + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + podUpdate := newPod(namespace, podName, node1Name, podV6IP) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, true, true), + ginkgotable.Entry("interconnect enabled; pod and egressnode are in local zone", true, true, true), + ginkgotable.Entry("interconnect enabled; pod is in local zone and egressnode is in remote zone", true, true, false), // snat won't be visible + ginkgotable.Entry("interconnect enabled; pod is in remote zone and egressnode is in local zone", true, false, true), + ) + ginkgotable.DescribeTable("egressIP pod retry should remove OVN pod egress setup when EgressIP stops matching pod label", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + } + + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + } + if podZone == "remote" { + // egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + podUpdate := newPod(namespace, podName, node1Name, podV6IP) + ginkgo.By("Bringing down NBDB") + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + time.Sleep(types.OVSDBTimeout + time.Second) + // check to see if the retry cache has an entry + var key string + key, err = retry.GetResourceKey(podUpdate) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + ginkgo.By("retry entry: new obj should not be nil, config should not be nil") + retry.CheckRetryObjectMultipleFieldsEventually( + key, + fakeOvn.controller.retryEgressIPPods, + gomega.BeNil(), // oldObj should be nil + gomega.Not(gomega.BeNil()), // newObj should not be nil + gomega.Not(gomega.BeNil()), // config should not be nil + ) + + connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) + defer cancel() + resetNBClient(connCtx, fakeOvn.controller.nbClient) + + retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressIPPods) + fakeOvn.controller.retryEgressIPPods.RequestRetryObjs() + // check the cache no longer has the entry + retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressIPPods) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), // static re-route is visible but reroute policy won't be + ) + + ginkgo.It("should not treat pod update if pod already had assigned IP when it got the ADD", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: nil, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", }, }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }) + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) - i, n, _ := net.ParseCIDR(podV4IP + "/23") + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + } + + i, n, _ := net.ParseCIDR(podV6IP + "/23") n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) @@ -935,75 +1668,44 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - - lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} - fakeOvn.controller.nbClient.Get(context.Background(), lsp) - gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) - gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) - err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - node1.Labels = map[string]string{} - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Create(context.TODO(), egressNamespace, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) expectedNatLogicalPort := "k8s-node2" expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, &nbdb.LogicalRouterPolicy{ Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), + Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, + Nexthops: nodeLogicalRouterIPv6, ExternalIDs: map[string]string{ "name": eIP.Name, }, UUID: "reroute-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, &nbdb.NAT{ UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, + LogicalIP: podV6IP, + ExternalIP: egressIP.String(), ExternalIDs: map[string]string{ "name": egressIPName, }, @@ -1014,50 +1716,31 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", Nat: []string{"egressip-nat-UUID"}, }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - gomega.Eventually(fakeOvn.nbClient, inspectTimeout).Should(libovsdbtest.HaveData(expectedDatabaseState)) + podUpdate := newPodWithLabels(namespace, podName, node1Name, podV6IP, map[string]string{ + "egress": "needed", + "some": "update", + }) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -1206,58 +1889,164 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { ginkgo.Context("On node DELETE", func() { - ginkgo.It("should re-assign EgressIPs and perform proper OVN transactions when node's gateway objects are already deleted", func() { - app.Action = func(ctx *cli.Context) error { + ginkgotable.DescribeTable("should treat pod update if pod did not have an assigned IP when it got the ADD", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespace(namespace) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + }, }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, }, }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + podUpdate := newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + podUpdate.Annotations = map[string]string{ + "k8s.ovn.org/pod-networks": fmt.Sprintf("{\"default\":{\"ip_addresses\":[\"%s/23\"],\"mac_address\":\"0a:58:0a:83:00:0f\",\"gateway_ips\":[\"%s\"],\"ip_address\":\"%s/23\",\"gateway_ip\":\"%s\"}}", podV6IP, v6GatewayIP, podV6IP, v6GatewayIP), + } + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(podV6IP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + } + if podZone == "remote" { + // egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(podV6IP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), // static re-route is visible but reroute policy won't be + ) + + ginkgo.It("should not treat pod DELETE if pod did not have an assigned IP when it got the ADD and we receive a DELETE before the IP UPDATE", func() { + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespace(namespace) + fakeOvn.startWithDBSetup(clusterRouterDbSetup, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, + EgressIPs: []string{ + egressIP.String(), + }, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -1267,327 +2056,541 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{node2LogicalRouterIfAddrV4}, + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Delete(context.TODO(), egressPod.Name, *metav1.NewDeleteOptions(0)) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + + ginkgo.Context("IPv6 on namespace UPDATE", func() { + + ginkgotable.DescribeTable("should remove OVN pod egress setup when EgressIP is deleted", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } + + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + } + if podZone == "remote" { + // egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), + ) + + ginkgotable.DescribeTable("egressIP retry should remove OVN pod egress setup when EgressIP is deleted", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, }, - &nbdb.LogicalSwitch{ - UUID: types.OVNJoinSwitch + "-UUID", - Name: types.OVNJoinSwitch, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, }, }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }) + } - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } - lsp := &nbdb.LogicalSwitchPort{Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name} - fakeOvn.controller.nbClient.Get(context.Background(), lsp) - gomega.Eventually(lsp.Options["nat-addresses"]).Should(gomega.Equal("router")) - gomega.Eventually(lsp.Options["exclude-lb-vips-from-garp"]).Should(gomega.Equal("true")) + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + ginkgo.By("Bringing down NBDB") + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Create(context.TODO(), &egressPod, metav1.CreateOptions{}) + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) - expectedNatLogicalPort := "k8s-node1" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, + // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail + time.Sleep(types.OVSDBTimeout + time.Second) + // check to see if the retry cache has an entry + key, err := retry.GetResourceKey(&eIP) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + retry.CheckRetryObjectEventually(key, true, fakeOvn.controller.retryEgressIPs) + + connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) + defer cancel() + resetNBClient(connCtx, fakeOvn.controller.nbClient) + retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressIPs) + fakeOvn.controller.retryEgressIPs.RequestRetryObjs() + // check the cache no longer has the entry + retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressIPs) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + getEIPSNAT(podV6IP, egressIP.String(), expectedNatLogicalPort), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{node2LogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + } + if podZone == "remote" { + // egressNode is in different zone than pod and egressNode is in local zone, so static reroute will be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[2].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - }, - &nbdb.LogicalSwitch{ - UUID: types.OVNJoinSwitch + "-UUID", - Name: types.OVNJoinSwitch, - }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - err = fakeOvn.controller.gatewayCleanup(node1Name) // simulate an already deleted node + err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), + ) + + ginkgotable.DescribeTable("should remove OVN pod egress setup when EgressIP stops matching", + func(interconnect bool, podZone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }, + ) - err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) + if podZone == "remote" { + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, false) + } - // E0608 12:53:33.728155 1161455 egressip.go:882] Allocator error: EgressIP: egressip claims to have an allocation on a node which is unassignable for egress IP: node1 - // W0608 12:53:33.728205 1161455 egressip.go:2030] Unable to retrieve gateway IP for node: node1, protocol is IPv6: false, err: attempt at finding node gateway router network information failed, err: unable to find router port rtoj-GR_node1: object not found - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(nodeSwitch).Should(gomega.Equal(node2.Name)) // egressIP successfully reassigned to node2 - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{ + egressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + }, + } - expectedNatLogicalPort = "k8s-node2" - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: node2LogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "6", nodeLogicalRouterIPv6), + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podV6IP, + ExternalIP: egressIP.String(), + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{node2LogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + } + if podZone == "remote" { + // pod is in remote zone, its LRP won't be visible + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, nodeLogicalRouterIPv6[0])) + expectedDatabaseState[1].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState[1].(*nbdb.LogicalRouter).Policies = []string{} + expectedDatabaseState = expectedDatabaseState[1:] + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + namespaceUpdate := newNamespace(namespace) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.TODO(), namespaceUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, + Networks: []string{nodeLogicalRouterIfAddrV6}, }, - }, - &nbdb.LogicalSwitch{ - UUID: types.OVNJoinSwitch + "-UUID", - Name: types.OVNJoinSwitch, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Nat: nil, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("IPv6 on pod UPDATE", func() { + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global"), + ginkgotable.Entry("interconnect enabled; pod is in global zone", true, "global"), + ginkgotable.Entry("interconnect enabled; pod is in remote zone", true, "remote"), + ) - ginkgo.It("should remove OVN pod egress setup when EgressIP stops matching pod label", func() { + ginkgo.It("should not remove OVN pod egress setup when EgressIP stops matching, but pod never had any IP to begin with", func() { app.Action = func(ctx *cli.Context) error { egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, - }, - }, - }, + egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + fakeOvn.startWithDBSetup(clusterRouterDbSetup, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, @@ -1596,11 +2599,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, ) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), + ObjectMeta: newEgressIPMeta("egressip"), Spec: egressipv1.EgressIPSpec{ EgressIPs: []string{ egressIP.String(), @@ -1609,17 +2609,11 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { MatchLabels: egressPodLabel, }, NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, + MatchLabels: egressPodLabel, }, }, } - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() @@ -1630,112 +2624,418 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), + egressIPs, nodes := getEgressIPStatus(eIP.Name) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + + namespaceUpdate := newNamespace(namespace) + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.TODO(), namespaceUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + ginkgo.Context("on EgressIP UPDATE", func() { + + ginkgotable.DescribeTable("should update OVN on EgressIP .spec.egressips change", + func(interconnect bool, node1Zone, node2Zone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + egressIP1 := "192.168.126.101" + egressIP2 := "192.168.126.102" + egressIP3 := "192.168.126.103" + node1IPv4 := "192.168.126.202/24" + node2IPv4 := "192.168.126.51/24" + + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node1Zone, // used only for ic=true test + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.3/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test + } + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + }, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod}, + }) + + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP1, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(eIP.Name, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(2)) + egressIPs, nodes := getEgressIPStatus(eIP.Name) + assignmentNode1, assignmentNode2 := nodes[0], nodes[1] + assignedEgressIP1, assignedEgressIP2 := egressIPs[0], egressIPs[1] + + expectedNatLogicalPort1 := fmt.Sprintf("k8s-%s", assignmentNode1) + expectedNatLogicalPort2 := fmt.Sprintf("k8s-%s", assignmentNode2) + natEIP1 := &nbdb.NAT{ + UUID: "egressip-nat-1-UUID", + LogicalIP: podV4IP, + ExternalIP: assignedEgressIP1, ExternalIDs: map[string]string{ "name": egressIPName, }, Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, + LogicalPort: &expectedNatLogicalPort1, Options: map[string]string{ "stateless": "false", }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - } - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + } + natEIP2 := &nbdb.NAT{ + UUID: "egressip-nat-2-UUID", + LogicalIP: podV4IP, + ExternalIP: assignedEgressIP2, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort2, + Options: map[string]string{ + "stateless": "false", + }, + } + expectedDatabaseState := []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", []string{"100.64.0.2", "100.64.0.3"}), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + assignmentNode1, + UUID: ovntypes.GWRouterPrefix + assignmentNode1 + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + assignmentNode2, + UUID: ovntypes.GWRouterPrefix + assignmentNode2 + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + } + if !interconnect || node1Zone == "global" { + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + expectedDatabaseState[3].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-1-UUID"} + expectedDatabaseState = append(expectedDatabaseState, natEIP1) + } + if !interconnect || node2Zone == "global" { + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + expectedDatabaseState[4].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-2-UUID"} + expectedDatabaseState = append(expectedDatabaseState, natEIP2) + } + if node2Zone != node1Zone && node2Zone == "remote" { + // the policy reroute will have its second nexthop as transit switchIP + // so the one with join switchIP is where podNode == egressNode and one with transitIP is where podNode != egressNode + expectedDatabaseState[0].(*nbdb.LogicalRouterPolicy).Nexthops = []string{"100.64.0.2", "168.254.0.3"} + } + if node2Zone != node1Zone && node1Zone == "remote" { + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, "100.64.0.3")) + expectedDatabaseState[5].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState[5].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] // policy is not visible since podNode is remote + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - podUpdate := newPod(namespace, podName, node1Name, podV6IP) + latest, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + latest.Spec.EgressIPs = []string{egressIP3, egressIP2} + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), latest, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status = []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP3, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(eIP.Name, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, - }, + gomega.Eventually(func() []string { + egressIPs, _ = getEgressIPStatus(eIP.Name) + return egressIPs + }).Should(gomega.ConsistOf(egressIP3, egressIP2)) + + egressIPs, nodes = getEgressIPStatus(eIP.Name) + assignmentNode1, assignmentNode2 = nodes[0], nodes[1] + assignedEgressIP1, assignedEgressIP2 = egressIPs[0], egressIPs[1] + + expectedNatLogicalPort1 = fmt.Sprintf("k8s-%s", assignmentNode1) + expectedNatLogicalPort2 = fmt.Sprintf("k8s-%s", assignmentNode2) + expectedDatabaseState = []libovsdbtest.TestData{ + getReRoutePolicy(egressPod.Status.PodIP, "4", []string{"100.64.0.2", "100.64.0.3"}), + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "default-no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + assignmentNode1, + UUID: ovntypes.GWRouterPrefix + assignmentNode1 + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + assignmentNode2, + UUID: ovntypes.GWRouterPrefix + assignmentNode2 + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + } + if !interconnect || node1Zone == "global" { + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[8].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + expectedDatabaseState[3].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-1-UUID"} + natEIP1.ExternalIP = assignedEgressIP1 + expectedDatabaseState = append(expectedDatabaseState, natEIP1) + } + if !interconnect || node2Zone == "global" { + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["nat-addresses"] = "router" + expectedDatabaseState[9].(*nbdb.LogicalSwitchPort).Options["exclude-lb-vips-from-garp"] = "true" + expectedDatabaseState[4].(*nbdb.LogicalRouter).Nat = []string{"egressip-nat-2-UUID"} + natEIP2.ExternalIP = assignedEgressIP2 + expectedDatabaseState = append(expectedDatabaseState, natEIP2) + } + if node2Zone != node1Zone && node2Zone == "remote" { + // the policy reroute will have its second nexthop as transit switchIP + // so the one with join switchIP is where podNode == egressNode and one with transitIP is where podNode != egressNode + expectedDatabaseState[0].(*nbdb.LogicalRouterPolicy).Nexthops = []string{"100.64.0.2", "168.254.0.3"} + } + if node2Zone != node1Zone && node1Zone == "remote" { + expectedDatabaseState = append(expectedDatabaseState, getReRouteStaticRoute(egressPod.Status.PodIP, "100.64.0.3")) + expectedDatabaseState[5].(*nbdb.LogicalRouter).Policies = []string{"default-no-reroute-UUID", "no-reroute-service-UUID"} + expectedDatabaseState[5].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + expectedDatabaseState = expectedDatabaseState[1:] // policy is not visible since podNode is remote + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + return nil } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global", "global"), + ginkgotable.Entry("interconnect enabled; node1 and node2 in global zones", true, "global", "global"), + // will showcase localzone setup - master is in pod's zone where pod's reroute policy towards egressNode will be done. + // NOTE: SNAT won't be visible because its in remote zone + ginkgotable.Entry("interconnect enabled; node1 in global and node2 in remote zones", true, "global", "remote"), + // will showcase localzone setup - master is in egress node's zone where pod's SNAT policy and static route will be done. + // NOTE: reroute policy won't be visible because its in remote zone (pod is in remote zone) + ginkgotable.Entry("interconnect enabled; node1 in remote and node2 in global zones", true, "remote", "global"), + ) - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - ginkgo.It("egressIP pod retry should remove OVN pod egress setup when EgressIP stops matching pod label", func() { + ginkgo.It("should delete and re-create", func() { app.Action = func(ctx *cli.Context) error { egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + updatedEgressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8ffd") egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, Networks: []string{nodeLogicalRouterIfAddrV6}, }, &nbdb.LogicalRouter{ @@ -1743,12 +3043,12 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", Nat: nil, }, }, @@ -1761,8 +3061,13 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, ) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + i, n, _ := net.ParseCIDR(podV6IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + // hack pod to be in the provided zone + fakeOvn.controller.eIPC.nodeZoneState.Store(node1Name, true) + fakeOvn.controller.eIPC.nodeZoneState.Store(node2Name, true) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -1774,17 +3079,11 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { MatchLabels: egressPodLabel, }, NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, + MatchLabels: egressPodLabel, }, }, } - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() @@ -1795,6 +3094,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, egressIP.String()) + gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) expectedNatLogicalPort := "k8s-node2" @@ -1815,8 +3116,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Policies: []string{"reroute-UUID"}, }, &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, Networks: []string{nodeLogicalRouterIfAddrV6}, }, &nbdb.NAT{ @@ -1833,55 +3134,47 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1Name, + UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node2Name, + UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", Nat: []string{"egressip-nat-UUID"}, }, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - podUpdate := newPod(namespace, podName, node1Name, podV6IP) - ginkgo.By("Bringing down NBDB") - // inject transient problem, nbdb is down - fakeOvn.controller.nbClient.Close() - gomega.Eventually(func() bool { - return fakeOvn.controller.nbClient.Connected() - }).Should(gomega.BeFalse()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - time.Sleep(types.OVSDBTimeout + time.Second) - // check to see if the retry cache has an entry - var key string - key, err = retry.GetResourceKey(podUpdate) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) + eIPUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("retry entry: new obj should not be nil, config should not be nil") - retry.CheckRetryObjectMultipleFieldsEventually( - key, - fakeOvn.controller.retryEgressIPPods, - gomega.BeNil(), // oldObj should be nil - gomega.Not(gomega.BeNil()), // newObj should not be nil - gomega.Not(gomega.BeNil()), // config should not be nil - ) + eIPUpdate.Spec = egressipv1.EgressIPSpec{ + EgressIPs: []string{ + updatedEgressIP.String(), + }, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + } - connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) - defer cancel() - resetNBClient(connCtx, fakeOvn.controller.nbClient) + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, updatedEgressIP.String()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressIPPods) - fakeOvn.controller.retryEgressIPPods.RequestRetryObjs() - // check the cache no longer has the entry - retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressIPPods) + gomega.Eventually(func() []string { + egressIPs, _ = getEgressIPStatus(eIP.Name) + return egressIPs + }).Should(gomega.ContainElement(updatedEgressIP.String())) + gomega.Expect(nodes[0]).To(gomega.Equal(node2Name)) return nil } @@ -1889,145 +3182,123 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should not treat pod update if pod already had assigned IP when it got the ADD", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + }) - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + ginkgo.Context("WatchEgressNodes", func() { - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + ginkgo.It("should populated egress node data as they are tagged `egress assignable` with variants of IPv4/IPv6", func() { + app.Action = func(ctx *cli.Context) error { - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - }, - }, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) + fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, }, }, }, + }) + err := fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", } - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedNatLogicalPort := "k8s-node2" expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, + Policies: []string{"reroute-UUID", "no-reroute-service-UUID"}, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "reroute-UUID", }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - - podUpdate := newPodWithLabels(namespace, podName, node1Name, podV6IP, map[string]string{ - "egress": "needed", - "some": "update", - }) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil } @@ -2035,139 +3306,105 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should treat pod update if pod did not have an assigned IP when it got the ADD", func() { + ginkgo.It("using retry to create egress node with forced error followed by an update", func() { app.Action = func(ctx *cli.Context) error { - - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - - egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, - }, - }, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), + nodeIPv4 := "192.168.126.51/24" + nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + node := getNodeObj("node", annotations, map[string]string{}) + fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + nodeName, }, }, }, - } - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() + }) + err := fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + node.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By("Bringing down NBDB") + // inject transient problem, nbdb is down + fakeOvn.controller.nbClient.Close() + gomega.Eventually(func() bool { + return fakeOvn.controller.nbClient.Connected() + }).Should(gomega.BeFalse()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail + // there is a chance that both egressnode events(node1 removal and node2 update) will end up in the same event queue + // sleep for double the time to allow for two consecutive TransactWithRetry timeouts + time.Sleep(2 * (types.OVSDBTimeout + time.Second)) + // check to see if the retry cache has an entry + key, err := retry.GetResourceKey(&node) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + retry.CheckRetryObjectEventually(key, true, fakeOvn.controller.retryEgressNodes) + ginkgo.By("retry entry: old obj should be nil, new obj should not be nil") + retry.CheckRetryObjectMultipleFieldsEventually( + key, + fakeOvn.controller.retryEgressNodes, + gomega.BeNil(), // oldObj should be nil + gomega.Not(gomega.BeNil()), // newObj should not be nil + ) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - - podUpdate := newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - podUpdate.Annotations = map[string]string{ - "k8s.ovn.org/pod-networks": fmt.Sprintf("{\"default\":{\"ip_addresses\":[\"%s/23\"],\"mac_address\":\"0a:58:0a:83:00:0f\",\"gateway_ips\":[\"%s\"],\"ip_address\":\"%s/23\",\"gateway_ip\":\"%s\"}}", podV6IP, v6GatewayIP, podV6IP, v6GatewayIP), - } - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), podUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + node.Labels = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) + defer cancel() + resetNBClient(connCtx, fakeOvn.controller.nbClient) + retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressNodes) + fakeOvn.controller.retryEgressNodes.RequestRetryObjs() + // check the cache no longer has the entry + retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressNodes) - expectedNatLogicalPort := "k8s-node2" expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", podV6IP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, + Policies: []string{"reroute-UUID", "no-reroute-service-UUID"}, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "reroute-UUID", }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + nodeName, + }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) @@ -2178,33 +3415,31 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should not treat pod DELETE if pod did not have an assigned IP when it got the ADD and we receive a DELETE before the IP UPDATE", func() { + ginkgo.It("egressIP pod recreate with same name (stateful-sets) shouldn't use stale logicalPortCache entries", func() { app.Action = func(ctx *cli.Context) error { - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + config.Gateway.DisableSNATMultipleGWs = true - egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) - egressNamespace := newNamespace(namespace) - fakeOvn.startWithDBSetup(clusterRouterDbSetup, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + egressIP1 := "192.168.126.101" + node1IPv4 := "192.168.126.12/24" - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + egressPod1 := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, + EgressIPs: []string{egressIP1}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -2214,188 +3449,232 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node1.Name, + EgressIP: egressIP1, + }, + }, + }, + } + nodeSwitch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, } - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - - err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Delete(context.TODO(), egressPod.Name, *metav1.NewDeleteOptions(0)) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("IPv6 on namespace UPDATE", func() { - - ginkgo.It("should remove OVN pod egress setup when EgressIP is deleted", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, }, + nodeSwitch, }, }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod}, + Items: []v1.Pod{egressPod1}, }, ) + // we don't know the real switch UUID in the db, but it can be found by name + swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) + fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - }, - } - - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + podAddr := fmt.Sprintf("%s %s", egressPodPortInfo.mac.String(), egressPodIP) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ + expectedNatLogicalPort1 := "k8s-node1" + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, &nbdb.LogicalRouterPolicy{ Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), + Match: fmt.Sprintf("ip4.src == %s", egressPodIP), Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, + Nexthops: nodeLogicalRouterIPv4, ExternalIDs: map[string]string{ "name": eIP.Name, }, - UUID: "reroute-UUID", + UUID: "reroute-UUID1", }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1"}, }, &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), + UUID: "egressip-nat-UUID1", + LogicalIP: egressPodIP.String(), + ExternalIP: egressIP1, ExternalIDs: map[string]string{ "name": egressIPName, }, Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, + LogicalPort: &expectedNatLogicalPort1, Options: map[string]string{ "stateless": "false", }, }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + nodeSwitch, + } + podLSP := &nbdb.LogicalSwitchPort{ + UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", + Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), + Addresses: []string{podAddr}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": egressPod1.Namespace, + }, + Options: map[string]string{ + "requested-chassis": egressPod1.Spec.NodeName, + "iface-id-ver": egressPod1.Name, + }, + PortSecurity: []string{podAddr}, + } + nodeSwitch.Ports = []string{podLSP.UUID} + finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + _, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + // delete the pod + err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Delete(context.TODO(), + egressPod1.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + expectedDatabaseStateWithoutPod := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + Ports: []string{}, }, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithoutPod)) + // recreate pod with same name immediately; simulating handler race (pods v/s egressip) condition, + // so instead of proper pod create, we try out egressIP pod setup which will be a no-op since pod doesn't exist + ginkgo.By("should not add egress IP setup for a deleted pod whose entry exists in logicalPortCache") + err = fakeOvn.controller.addPodEgressIPAssignments(egressIPName, eIP.Status.Items, &egressPod1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // pod is gone but logicalPortCache holds the entry for 60seconds + egressPodPortInfo, err = fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeFalse()) + staleEgressPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(staleEgressPodIP).To(gomega.Equal(egressPodIP)) + // no-op + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithoutPod)) + return nil } @@ -2403,178 +3682,301 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("egressIP retry should remove OVN pod egress setup when EgressIP is deleted", func() { + ginkgo.It("egressIP pod recreate with same name (stateful-sets) shouldn't use stale logicalPortCache entries AND stale podAssignment cache entries", func() { app.Action = func(ctx *cli.Context) error { - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + config.Gateway.DisableSNATMultipleGWs = true - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) + egressIP1 := "192.168.126.101" + node1IPv4 := "192.168.126.12/24" + + oldEgressPodIP := "10.128.0.50" + egressPod1 := newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + oldAnnotation := map[string]string{"k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.128.0.50/24"],"mac_address":"0a:58:0a:80:00:05","gateway_ips":["10.128.0.1"],"routes":[{"dest":"10.128.0.0/24","nextHop":"10.128.0.1"}],"ip_address":"10.128.0.50/24","gateway_ip":"10.128.0.1"}}`} + egressPod1.Annotations = oldAnnotation + egressNamespace := newNamespace(namespace) + + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{ + { + Node: node1.Name, + EgressIP: egressIP1, + }, + }, + }, + } + nodeSwitch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, }, + nodeSwitch, }, }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod}, + Items: []v1.Pod{*egressPod1}, }, ) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - }, - } - - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - ginkgo.By("Bringing down NBDB") - // inject transient problem, nbdb is down - fakeOvn.controller.nbClient.Close() - gomega.Eventually(func() bool { - return fakeOvn.controller.nbClient.Connected() - }).Should(gomega.BeFalse()) + // we don't know the real switch UUID in the db, but it can be found by name + swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) + fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.WatchPods() + fakeOvn.controller.WatchEgressIPNamespaces() + fakeOvn.controller.WatchEgressIPPods() + fakeOvn.controller.WatchEgressNodes() + fakeOvn.controller.WatchEgressIP() - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + oldEgressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(egressPod1, types.DefaultNetworkName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail - time.Sleep(types.OVSDBTimeout + time.Second) - // check to see if the retry cache has an entry - key, err := retry.GetResourceKey(&eIP) + egressPodIP, _, err := net.ParseCIDR(oldEgressPodPortInfo.ips[0].String()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - retry.CheckRetryObjectEventually(key, true, fakeOvn.controller.retryEgressIPs) - - connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) - defer cancel() - resetNBClient(connCtx, fakeOvn.controller.nbClient) - retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressIPs) - fakeOvn.controller.retryEgressIPs.RequestRetryObjs() - // check the cache no longer has the entry - retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressIPs) - - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + gomega.Expect(egressPodIP.String()).To(gomega.Equal(oldEgressPodIP)) + gomega.Expect(oldEgressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + podAddr := fmt.Sprintf("%s %s", oldEgressPodPortInfo.mac.String(), egressPodIP) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, + expectedNatLogicalPort1 := "k8s-node1" + podEIPSNAT := &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: egressPodIP.String(), + ExternalIP: egressIP1, + ExternalIDs: map[string]string{ + "name": egressIPName, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, + } + podReRoutePolicy := &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", oldEgressPodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID1", + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1"}, + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + podEIPSNAT, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + podReRoutePolicy, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + }, + node1GR, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", Options: map[string]string{ - "stateless": "false", + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, + nodeSwitch, + } + podLSP := &nbdb.LogicalSwitchPort{ + UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", + Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), + Addresses: []string{podAddr}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": egressPod1.Namespace, + }, + Options: map[string]string{ + "requested-chassis": egressPod1.Spec.NodeName, + "iface-id-ver": egressPod1.Name, }, + PortSecurity: []string{podAddr}, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + nodeSwitch.Ports = []string{podLSP.UUID} + finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + _, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), eIP.Name, metav1.DeleteOptions{}) + // delete the pod and simulate a cleanup failure: + // 1) create a situation where pod is gone from kapi but egressIP setup wasn't cleanedup due to deletion error + // - we remove annotation from pod to mimic this situation + // 2) leaves us with a stale podAssignment cache + // 3) check to make sure the logicalPortCache is used always even if podAssignment already has the podKey + ginkgo.By("delete the egress IP pod and force the deletion to fail") + egressPod1.Annotations = map[string]string{} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Update(context.TODO(), egressPod1, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // Wait for the cleared annotations to show up client-side + gomega.Eventually(func() int { + egressPod1, _ = fakeOvn.watcher.GetPod(egressPod1.Namespace, egressPod1.Name) + return len(egressPod1.Annotations) + }, 5).Should(gomega.Equal(0)) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + // Delete the pod to trigger the cleanup failure + err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Delete(context.TODO(), + egressPod1.Name, metav1.DeleteOptions{}) + // internally we have an error: + // E1006 12:51:59.594899 2500972 obj_retry.go:1517] Failed to delete *factory.egressIPPod egressip-namespace/egress-pod, error: pod egressip-namespace/egress-pod: no pod IPs found + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // notice that pod objects aren't cleaned up yet since deletion failed! + // even the LSP sticks around for 60 seconds + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) + // egressIP cache is stale in the sense the podKey has not been deleted since deletion failed + pas := getPodAssignmentState(egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + gomega.Expect(pas.egressStatuses).To(gomega.Equal(map[egressipv1.EgressIPStatusItem]string{ + { + Node: "node1", + EgressIP: "192.168.126.101", + }: "", + })) + // recreate pod with same name immediately; + ginkgo.By("should add egress IP setup for the NEW pod which exists in logicalPortCache") + newEgressPodIP := "10.128.0.60" + egressPod1 = newPodWithLabels(namespace, podName, node1Name, newEgressPodIP, egressPodLabel) + egressPod1.Annotations = map[string]string{"k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.128.0.60/24"],"mac_address":"0a:58:0a:80:00:06","gateway_ips":["10.128.0.1"],"routes":[{"dest":"10.128.0.0/24","nextHop":"10.128.0.1"}],"ip_address":"10.128.0.60/24","gateway_ip":"10.128.0.1"}}`} + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Create(context.TODO(), egressPod1, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // wait for the logical port cache to get updated with the new pod's IP + var newEgressPodPortInfo *lpInfo + getEgressPodIP := func() string { + newEgressPodPortInfo, err = fakeOvn.controller.logicalPortCache.get(egressPod1, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, _, err := net.ParseCIDR(newEgressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return egressPodIP.String() + } + gomega.Eventually(func() string { + return getEgressPodIP() + }).Should(gomega.Equal(newEgressPodIP)) + gomega.Expect(newEgressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + + // deletion for the older EIP pod object is still being retried so we still have SNAT + // towards nodeIP for new pod which is created by addLogicalPort. + // Note that we while have the stale re-route policy for old pod, the snat for the old pod towards egressIP is gone + // because deleteLogicalPort removes ALL snats for a given pod but doesn't remove the policies. + ipv4Addr, _, _ := net.ParseCIDR(node1IPv4) + podNodeSNAT := &nbdb.NAT{ + UUID: "node-nat-UUID1", + LogicalIP: newEgressPodIP, + ExternalIP: ipv4Addr.String(), + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", }, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + node1GR.Nat = []string{podNodeSNAT.UUID} + podAddr = fmt.Sprintf("%s %s", newEgressPodPortInfo.mac.String(), newEgressPodIP) + podLSP.PortSecurity = []string{podAddr} + podLSP.Addresses = []string{podAddr} + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[1:])) + + ginkgo.By("trigger a forced retry and ensure deletion of oldPod and creation of newPod are successful") + // let us add back the annotation to the oldPod which is being retried to make deletion a success + podKey, err := retry.GetResourceKey(egressPod1) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + retry.CheckRetryObjectEventually(podKey, true, fakeOvn.controller.retryEgressIPPods) + retryOldObj := retry.GetOldObjFromRetryObj(podKey, fakeOvn.controller.retryEgressIPPods) + //fakeOvn.controller.retryEgressIPPods.retryEntries.LoadOrStore(podKey, &RetryObjEntry{backoffSec: 1}) + pod, _ := retryOldObj.(*v1.Pod) + pod.Annotations = oldAnnotation + fakeOvn.controller.retryEgressIPPods.RequestRetryObjs() + // there should also be no entry for this pod in the retry cache + gomega.Eventually(func() bool { + return retry.CheckRetryObj(podKey, fakeOvn.controller.retryEgressIPPods) + }, retry.RetryObjInterval+time.Second).Should(gomega.BeFalse()) + + // ensure that egressIP setup is being done with the new pod's information from logicalPortCache + podReRoutePolicy.Match = fmt.Sprintf("ip4.src == %s", newEgressPodIP) + podEIPSNAT.LogicalIP = newEgressPodIP + node1GR.Nat = []string{podEIPSNAT.UUID} + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[:len(finalDatabaseStatewithPod)-1])) return nil } @@ -2582,274 +3984,618 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should remove OVN pod egress setup when EgressIP stops matching", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + ginkgotable.DescribeTable("egressIP pod managed by multiple objects, verify standby works wells, verify syncPodAssignmentCache on restarts", + func(interconnect bool, node1Zone, node2Zone string) { + config.OVNKubernetesFeature.EnableInterconnect = interconnect + app.Action = func(ctx *cli.Context) error { + + config.Gateway.DisableSNATMultipleGWs = true + + egressIP1 := "192.168.126.25" + egressIP2 := "192.168.126.30" + egressIP3 := "192.168.126.35" + node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.13/24" + + egressPod1 := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.2/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node1Zone, // used only for ic=true test + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:50", "ip-address":"192.168.126.13/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + "k8s.ovn.org/node-transit-switch-port-ifaddr": "{\"ipv4\":\"168.254.0.3/16\"}", // used only for ic=true test + "k8s.ovn.org/zone-name": node2Zone, // used only for ic=true test + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) + eIP1 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, }, }, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + } + + eIP2 := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName2), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP3}, + PodSelector: metav1.LabelSelector{ + MatchLabels: egressPodLabel, + }, + NamespaceSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": egressNamespace.Name, + }, + }, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, }, - }, - } - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + } - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node2GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } + node2LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + } - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + node1GR, node2GR, + node1LSP, node2LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + node1Switch, + &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + }, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP1, eIP2}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{*egressNamespace}, + }, + &v1.PodList{ + Items: []v1.Pod{egressPod1}, + }, + ) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ + // we don't know the real switch UUID in the db, but it can be found by name + sw1UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) + sw2UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node2.Name) + fakeOvn.controller.lsManager.AddSwitch(node1.Name, sw1UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + fakeOvn.controller.lsManager.AddSwitch(node2.Name, sw2UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node1Name, egressIP1) + + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP3, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName2, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Get(context.TODO(), egressPod1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) + podAddr := fmt.Sprintf("%s %s", egressPodPortInfo.mac.String(), egressPodIP[0].String()) + + // Ensure first egressIP object is assigned, since only node1 is an egressNode, only 1IP will be assigned, other will be pending + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + egressIPs1, nodes1 := getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs1[0]).To(gomega.Equal(egressIP1)) + + // Ensure second egressIP object is also assigned to node1, but no OVN config will be done for this + gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) + egressIPs2, nodes2 := getEgressIPStatus(egressIPName2) + gomega.Expect(nodes2[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) + recordedEvent := <-fakeOvn.fakeRecorder.Events + gomega.Expect(recordedEvent).To(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) + + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + + assginedEIP := egressIPs1[0] + gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + eip1Obj, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + + podEIPSNAT := &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: egressPodIP[0].String(), + ExternalIP: assginedEIP, + ExternalIDs: map[string]string{ + "name": pas.egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.String("k8s-node1"), + Options: map[string]string{ + "stateless": "false", + }, + } + podReRoutePolicy := &nbdb.LogicalRouterPolicy{ Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), + Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, + Nexthops: nodeLogicalRouterIPv4, ExternalIDs: map[string]string{ - "name": eIP.Name, + "name": pas.egressIPName, }, - UUID: "reroute-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), + UUID: "reroute-UUID1", + } + node1GR.Nat = []string{"egressip-nat-UUID1"} + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + if node1Zone != node2Zone && node1Zone == "remote" { + // GARP for remote zones are taken care of by remote controller + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + } + } + if !interconnect || node2Zone == "global" { + node2LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + podEIPSNAT, + podReRoutePolicy, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + }, + node1GR, node2GR, + node1LSP, node2LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + node1Switch, + &nbdb.LogicalSwitch{ + UUID: node2.Name + "-UUID", + Name: node2.Name, + }, + } + podLSP := &nbdb.LogicalSwitchPort{ + UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", + Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), + Addresses: []string{podAddr}, ExternalIDs: map[string]string{ - "name": egressIPName, + "pod": "true", + "namespace": egressPod1.Namespace, + }, + Options: map[string]string{ + "requested-chassis": egressPod1.Spec.NodeName, + "iface-id-ver": egressPod1.Name, + }, + PortSecurity: []string{podAddr}, + } + node1Switch.Ports = []string{podLSP.UUID} + finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) + if node1Zone == "remote" { + // policy is not visible since podNode is in remote zone + finalDatabaseStatewithPod[4].(*nbdb.LogicalRouter).Policies = []string{"no-reroute-UUID", "no-reroute-service-UUID"} + finalDatabaseStatewithPod = finalDatabaseStatewithPod[2:] + podEIPSNAT.ExternalIP = "192.168.126.12" // EIP SNAT is not visible since podNode is remote, SNAT towards nodeIP is visible. + podEIPSNAT.LogicalPort = nil + podNodeSNAT := &nbdb.NAT{ + UUID: "node-nat-UUID1", + LogicalIP: egressPodIP[0].String(), + ExternalIP: "192.168.126.12", + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", + }, + } + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + node1GR.Nat = []string{"node-nat-UUID1"} + } + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) + + // Make second node egressIP assignable + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status = []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP1, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ensure secondIP from first object gets assigned to node2 + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + egressIPs1, nodes1 = getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[1]).To(gomega.Equal(node2.Name)) + gomega.Expect(egressIPs1[1]).To(gomega.Equal(egressIP2)) + + podEIPSNAT2 := &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: egressPodIP[0].String(), + ExternalIP: egressIPs1[1], + ExternalIDs: map[string]string{ + "name": pas.egressIPName, }, Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, + LogicalPort: utilpointer.String("k8s-node2"), Options: map[string]string{ "stateless": "false", }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + } + podReRoutePolicy.Nexthops = []string{nodeLogicalRouterIPv4[0], node2LogicalRouterIPv4[0]} + if node2Zone == "remote" { + // the policy reroute will have its second nexthop as transit switchIP + // so the one with join switchIP is where podNode == egressNode and one with transitIP is where podNode != egressNode + podReRoutePolicy.Nexthops = []string{"100.64.0.2", "168.254.0.3"} + } + if node1Zone == "remote" { + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, getReRouteStaticRoute(egressPodIP[0].String(), node2LogicalRouterIPv4[0])) + finalDatabaseStatewithPod[2].(*nbdb.LogicalRouter).StaticRoutes = []string{"reroute-static-route-UUID"} + } + if !interconnect || node2Zone == "global" { + node2GR.Nat = []string{"egressip-nat-UUID2"} + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podEIPSNAT2) + } - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - namespaceUpdate := newNamespace(namespace) + // check the state of the cache for podKey + pas = getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.TODO(), namespaceUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) + gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) + gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } + // let's test syncPodAssignmentCache works as expected! Nuke the podAssignment cache first + fakeOvn.controller.eIPC.podAssignmentMutex.Lock() + fakeOvn.controller.eIPC.podAssignment = make(map[string]*podAssignmentState) // replicates controller startup state + fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) + egressIPCache, err := fakeOvn.controller.generateCacheForEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.syncPodAssignmentCache(egressIPCache) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.It("should not remove OVN pod egress setup when EgressIP stops matching, but pod never had any IP to begin with", func() { - app.Action = func(ctx *cli.Context) error { + pas = getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) + gomega.Expect(pas.egressStatuses).To(gomega.Equal(map[egressipv1.EgressIPStatusItem]string{})) + gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + + // reset egressStatuses for rest of the test to progress correctly + fakeOvn.controller.eIPC.podAssignmentMutex.Lock() + fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses[eip1Obj.Status.Items[0]] = "" + fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses[eip1Obj.Status.Items[1]] = "" + fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() + + // delete the standby egressIP object to make sure the cache is updated + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName2, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") + gomega.Eventually(func() bool { + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + return pas.standbyEgressIPNames.Has(egressIPName2) + }).Should(gomega.BeFalse()) + gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - egressPod := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) - fakeOvn.startWithDBSetup(clusterRouterDbSetup, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) + // add back the standby egressIP object + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP2, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status = []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP3, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName2, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + gomega.Eventually(func() bool { + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + return pas.standbyEgressIPNames.Has(egressIPName2) + }).Should(gomega.BeTrue()) + gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) + gomega.Eventually(func() string { + return <-fakeOvn.fakeRecorder.Events + }).Should(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) + + gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) + egressIPs2, nodes2 = getEgressIPStatus(egressIPName2) + gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) + assginedNodeForEIPObj2 := nodes2[0] + + // Delete the IP from object1 that was on node1 and ensure standby is not taking over + eIPUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta("egressip"), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + ipOnNode1 := assginedEIP + var ipOnNode2 string + if ipOnNode1 == egressIP1 { + ipOnNode2 = egressIP2 + } else { + ipOnNode2 = egressIP1 + } + eIPUpdate.Spec.EgressIPs = []string{ipOnNode2} + _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPUpdate, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node2Name, ipOnNode2) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs1, nodes1 = getEgressIPStatus(egressIPName) + gomega.Expect(nodes1[0]).To(gomega.Equal(node2.Name)) + gomega.Expect(egressIPs1[0]).To(gomega.Equal(ipOnNode2)) + + // check if the setup for firstIP from object1 is deleted properly + podReRoutePolicy.Nexthops = node2LogicalRouterIPv4 + if node2Zone == "remote" { + // the policy reroute will have its second nexthop as transit switchIP + // so the one with join switchIP is where podNode == egressNode and one with transitIP is where podNode != egressNode + podReRoutePolicy.Nexthops = []string{"168.254.0.3"} + } + podNodeSNAT := &nbdb.NAT{ + UUID: "node-nat-UUID1", + LogicalIP: egressPodIP[0].String(), + ExternalIP: "192.168.126.12", // adds back SNAT to nodeIP + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", }, - }, - } + } + if node1Zone != "remote" { + node1GR.Nat = []string{podNodeSNAT.UUID} + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[1:])) + } else { + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) + } - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() bool { + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + return pas.standbyEgressIPNames.Has(egressIPName2) + }).Should(gomega.BeTrue()) + gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // delete the first egressIP object and make sure the cache is updated + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) + // ensure standby takes over and we do the setup for it in OVN DB + gomega.Eventually(func() bool { + pas := getPodAssignmentState(&egressPod1) + gomega.Expect(pas).NotTo(gomega.BeNil()) + return pas.standbyEgressIPNames.Has(egressIPName2) + }).Should(gomega.BeFalse()) + gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName2)) + + finalDatabaseStatewithPod = expectedDatabaseStatewithPod + finalDatabaseStatewithPod = append(expectedDatabaseStatewithPod, podLSP) + podEIPSNAT.ExternalIP = egressIP3 + podEIPSNAT.ExternalIDs = map[string]string{ + "name": egressIPName2, + } + podReRoutePolicy.ExternalIDs = map[string]string{ + "name": egressIPName2, + } + if assginedNodeForEIPObj2 == node2.Name { + podEIPSNAT.LogicalPort = utilpointer.String("k8s-node2") + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + node1GR.Nat = []string{podNodeSNAT.UUID} + node2GR.Nat = []string{podEIPSNAT.UUID} + } + if assginedNodeForEIPObj2 == node1.Name { + podReRoutePolicy.Nexthops = nodeLogicalRouterIPv4 + node1GR.Nat = []string{podEIPSNAT.UUID} + node2GR.Nat = []string{} + } + if node1Zone == "remote" { + // policy is not visible since podNode is in remote zone + finalDatabaseStatewithPod[4].(*nbdb.LogicalRouter).Policies = []string{"no-reroute-UUID", "no-reroute-service-UUID"} + finalDatabaseStatewithPod = finalDatabaseStatewithPod[2:] + podEIPSNAT.ExternalIP = "192.168.126.12" // EIP SNAT is not visible since podNode is remote, SNAT towards nodeIP is visible. + podEIPSNAT.LogicalPort = nil + finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) + node1GR.Nat = []string{"node-nat-UUID1"} + finalDatabaseStatewithPod[2].(*nbdb.LogicalRouter).StaticRoutes = []string{} + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) + // delete the second egressIP object to make sure the cache is updated podKey should be gone since nothing is managing it anymore + err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName2, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - namespaceUpdate := newNamespace(namespace) + gomega.Eventually(func() bool { + return getPodAssignmentState(&egressPod1) != nil + }).Should(gomega.BeFalse()) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.TODO(), namespaceUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - return nil - } + // let's test syncPodAssignmentCache works as expected! Nuke the podAssignment cache first + fakeOvn.controller.eIPC.podAssignmentMutex.Lock() + fakeOvn.controller.eIPC.podAssignment = make(map[string]*podAssignmentState) // replicates controller startup state + fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() + + egressIPCache, err = fakeOvn.controller.generateCacheForEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.syncPodAssignmentCache(egressIPCache) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // we don't have any egressIPs, so cache is nil + gomega.Eventually(func() bool { + return getPodAssignmentState(&egressPod1) != nil + }).Should(gomega.BeFalse()) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + ginkgotable.Entry("interconnect disabled; non-ic - single zone setup", false, "global", "global"), + ginkgotable.Entry("interconnect enabled; node1 and node2 in global zones", true, "global", "global"), + // will showcase localzone setup - master is in pod's zone where pod's reroute policy towards egressNode will be done. + // NOTE: SNAT won't be visible because its in remote zone + ginkgotable.Entry("interconnect enabled; node1 in global and node2 in remote zones", true, "global", "remote"), + // will showcase localzone setup - master is in egress node's zone where pod's SNAT policy and static route will be done. + // NOTE: reroute policy won't be visible because its in remote zone (pod is in remote zone) + ginkgotable.Entry("interconnect enabled; node1 in remote and node2 in global zones", true, "remote", "global"), + ) - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) }) - ginkgo.Context("on EgressIP UPDATE", func() { - ginkgo.It("should update OVN on EgressIP .spec.egressips change", func() { - app.Action = func(ctx *cli.Context) error { + ginkgo.Context("WatchEgressNodes running with WatchEgressIP", func() { - egressIP1 := "192.168.126.101" - egressIP2 := "192.168.126.102" - egressIP3 := "192.168.126.103" - node1IPv4 := "192.168.126.202/24" - node2IPv4 := "192.168.126.51/24" + ginkgo.It("should treat un-assigned EgressIPs when it is tagged", func() { + app.Action = func(ctx *cli.Context) error { - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) + egressIP := "192.168.126.101" + nodeIPv4 := "192.168.126.51/24" + nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node1 := v1.Node{ + node := v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: node1Name, Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), }, }, Status: v1.NodeStatus{ @@ -2865,15 +4611,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, - }, + EgressIPs: []string{egressIP}, }, Status: egressipv1.EgressIPStatus{ Items: []egressipv1.EgressIPStatusItem{}, @@ -2883,60 +4621,31 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, }, }, }, }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, + &v1.NodeList{ + Items: []v1.Node{node}, }) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() @@ -2946,22 +4655,12 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(2)) - egressIPs, nodes := getEgressIPStatus(eIP.Name) - assignmentNode1, assignmentNode2 := nodes[0], nodes[1] - assignedEgressIP1, assignedEgressIP2 := egressIPs[0], egressIPs[1] - - expectedNatLogicalPort1 := fmt.Sprintf("k8s-%s", assignmentNode1) - expectedNatLogicalPort2 := fmt.Sprintf("k8s-%s", assignmentNode2) expectedDatabaseState := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", + UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -2969,83 +4668,21 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2", "100.64.0.3"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-1-UUID", - LogicalIP: podV4IP, - ExternalIP: assignedEgressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-2-UUID", - LogicalIP: podV4IP, - ExternalIP: assignedEgressIP2, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort2, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + assignmentNode1, - UUID: ovntypes.GWRouterPrefix + assignmentNode1 + "-UUID", - Nat: []string{"egressip-nat-1-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + assignmentNode2, - UUID: ovntypes.GWRouterPrefix + assignmentNode2 + "-UUID", - Nat: []string{"egressip-nat-2-UUID"}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", }, @@ -3053,29 +4690,29 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - latest, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - latest.Spec.EgressIPs = []string{egressIP3, egressIP2} - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), latest, metav1.UpdateOptions{}) + gomega.Eventually(eIP.Status.Items).Should(gomega.HaveLen(0)) + + node.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(func() []string { - egressIPs, _ = getEgressIPStatus(eIP.Name) - return egressIPs - }).Should(gomega.ConsistOf(egressIP3, egressIP2)) + fakeOvn.patchEgressIPObj(node1Name, egressIP) - egressIPs, nodes = getEgressIPStatus(eIP.Name) - assignmentNode1, assignmentNode2 = nodes[0], nodes[1] - assignedEgressIP1, assignedEgressIP2 = egressIPs[0], egressIPs[1] + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - expectedNatLogicalPort1 = fmt.Sprintf("k8s-%s", assignmentNode1) - expectedNatLogicalPort2 = fmt.Sprintf("k8s-%s", assignmentNode2) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", + UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -3083,83 +4720,21 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2", "100.64.0.3"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-1-UUID", - LogicalIP: podV4IP, - ExternalIP: assignedEgressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-2-UUID", - LogicalIP: podV4IP, - ExternalIP: assignedEgressIP2, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort2, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + assignmentNode1, - UUID: ovntypes.GWRouterPrefix + assignmentNode1 + "-UUID", - Nat: []string{"egressip-nat-1-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + assignmentNode2, - UUID: ovntypes.GWRouterPrefix + assignmentNode2 + "-UUID", - Nat: []string{"egressip-nat-2-UUID"}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node.Name, + UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", }, @@ -3173,158 +4748,134 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should delete and re-create", func() { + ginkgo.It("should result in error and event if specified egress IP is a cluster node IP", func() { app.Action = func(ctx *cli.Context) error { - egressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8e0d") - updatedEgressIP := net.ParseIP("0:0:0:0:0:feff:c0a8:8ffd") - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV6IP, egressPodLabel) - egressNamespace := newNamespaceWithLabels(namespace, egressPodLabel) - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - + egressIP := "192.168.126.51" + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, + }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, + } + fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: nil, + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, }, }, }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, - }, - ) - - i, n, _ := net.ParseCIDR(podV6IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{ - egressIP.String(), - }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, }, - } + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + err = fakeOvn.controller.WatchEgressNodes() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) + err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(eIP.Name)).Should(gomega.Equal(1)) - - expectedNatLogicalPort := "k8s-node2" expectedDatabaseState := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip6.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv6, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.name, - Networks: []string{nodeLogicalRouterIfAddrV6}, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV6IP, - ExternalIP: egressIP.String(), - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.name, - UUID: ovntypes.GWRouterPrefix + node1.name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.name, - UUID: ovntypes.GWRouterPrefix + node2.name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - egressIPs, nodes := getEgressIPStatus(eIP.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP.String())) - - eIPUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - eIPUpdate.Spec = egressipv1.EgressIPSpec{ - EgressIPs: []string{ - updatedEgressIP.String(), + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, } - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(func() []string { - egressIPs, _ = getEgressIPStatus(eIP.Name) - return egressIPs - }).Should(gomega.ContainElement(updatedEgressIP.String())) - - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) return nil } @@ -3332,127 +4883,94 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - }) - - ginkgo.Context("WatchEgressNodes", func() { - - ginkgo.It("should populated egress node data as they are tagged `egress assignable` with variants of IPv4/IPv6", func() { + ginkgo.It("should re-assigned EgressIPs when more nodes get tagged if the first assignment attempt wasn't fully successful", func() { app.Action = func(ctx *cli.Context) error { - node1IPv4 := "192.168.128.202/24" - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" + egressIP1 := "192.168.126.25" + egressIP2 := "192.168.126.30" + node1IPv4 := "192.168.126.51/24" + node2IPv4 := "192.168.126.101/24" - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels = map[string]string{} + node2 := getNodeObj(node2Name, annotations, labels) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP1, egressIP2}, }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, }, } - fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, - }, - }, - }) - err := fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) - - node1.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, ip1V4Sub, err := net.ParseCIDR(node1IPv4) - _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) - _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + }, + }, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) - - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node2.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "no-reroute-service-UUID"}, - }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "reroute-UUID", + UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -3460,6 +4978,11 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", @@ -3490,115 +5013,20 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("using retry to create egress node with forced error followed by an update", func() { - app.Action = func(ctx *cli.Context) error { - nodeIPv4 := "192.168.126.51/24" - nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node", - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + nodeName, - }, - }, - }, - }) - err := fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) - _, ipV4Sub, err := net.ParseCIDR(nodeIPv4) - _, ipV6Sub, err := net.ParseCIDR(nodeIPv6) - node.Labels = map[string]string{ + node2.Labels = map[string]string{ "k8s.ovn.org/egress-assignable": "", } - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ginkgo.By("Bringing down NBDB") - // inject transient problem, nbdb is down - fakeOvn.controller.nbClient.Close() - gomega.Eventually(func() bool { - return fakeOvn.controller.nbClient.Connected() - }).Should(gomega.BeFalse()) - - // sleep long enough for TransactWithRetry to fail, causing egressnode operations to fail - // there is a chance that both egressnode events(node1 removal and node2 update) will end up in the same event queue - // sleep for double the time to allow for two consecutive TransactWithRetry timeouts - time.Sleep(2 * (types.OVSDBTimeout + time.Second)) - // check to see if the retry cache has an entry - key, err := retry.GetResourceKey(&node) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - retry.CheckRetryObjectEventually(key, true, fakeOvn.controller.retryEgressNodes) - ginkgo.By("retry entry: old obj should be nil, new obj should not be nil") - retry.CheckRetryObjectMultipleFieldsEventually( - key, - fakeOvn.controller.retryEgressNodes, - gomega.BeNil(), // oldObj should be nil - gomega.Not(gomega.BeNil()), // newObj should not be nil - ) - - node.Labels = map[string]string{} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - connCtx, cancel := context.WithTimeout(context.Background(), types.OVSDBTimeout) - defer cancel() - resetNBClient(connCtx, fakeOvn.controller.nbClient) - retry.SetRetryObjWithNoBackoff(key, fakeOvn.controller.retryEgressNodes) - fakeOvn.controller.retryEgressNodes.RequestRetryObjs() - // check the cache no longer has the entry - retry.CheckRetryObjectEventually(key, false, fakeOvn.controller.retryEgressNodes) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node.Name].egressIPConfig.V4.Net).To(gomega.Equal(ipV4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node.Name].egressIPConfig.V6.Net).To(gomega.Equal(ipV6Sub)) - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "no-reroute-service-UUID"}, - }, + // note: since there are no egressIP pods created in this test, we didn't need to manually patch the status. + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "reroute-UUID", + UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -3607,15 +5035,36 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { UUID: "no-reroute-service-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + nodeName, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + nodeName, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } @@ -3627,39 +5076,29 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("egressIP pod recreate with same name (stateful-sets) shouldn't use stale logicalPortCache entries", func() { + ginkgo.It("should remove stale EgressIP setup when node label is removed while ovnkube-master is not running and assign to newly labelled node", func() { app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - - egressIP1 := "192.168.126.101" - node1IPv4 := "192.168.126.12/24" + egressIP1 := "192.168.126.25" + node1IPv4 := "192.168.126.51/24" - egressPod1 := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) egressNamespace := newNamespace(namespace) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + labels := map[string]string{} + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", } + node2 := getNodeObj(node2Name, annotations, labels) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -3683,27 +5122,31 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, } - nodeSwitch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + Name: types.OVNClusterRouter, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{nodeLogicalRouterIfAddrV4}, }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, @@ -3712,29 +5155,35 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, }, }, - nodeSwitch, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, }, }, &egressipv1.EgressIPList{ Items: []egressipv1.EgressIP{eIP}, }, &v1.NodeList{ - Items: []v1.Node{node1}, + Items: []v1.Node{node1, node2}, }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod1}, + Items: []v1.Pod{egressPod}, }, ) - // we don't know the real switch UUID in the db, but it can be found by name - swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - err := fakeOvn.controller.WatchPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -3743,20 +5192,17 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - podAddr := fmt.Sprintf("%s %s", egressPodPortInfo.mac.String(), egressPodIP) + fakeOvn.patchEgressIPObj(node2Name, egressIP1) - expectedNatLogicalPort1 := "k8s-node1" - expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + expectedNatLogicalPort := "k8s-node2" + expectedDatabaseState := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", + UUID: "default-no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -3766,103 +5212,50 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, &nbdb.LogicalRouterPolicy{ Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPodIP), + Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), Action: nbdb.LogicalRouterPolicyActionReroute, Nexthops: nodeLogicalRouterIPv4, ExternalIDs: map[string]string{ "name": eIP.Name, }, - UUID: "reroute-UUID1", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1"}, + UUID: "reroute-UUID", }, &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: egressPodIP.String(), + UUID: "egressip-nat-UUID", + LogicalIP: podV4IP, ExternalIP: egressIP1, ExternalIDs: map[string]string{ "name": egressIPName, }, Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, + LogicalPort: &expectedNatLogicalPort, Options: map[string]string{ "stateless": "false", }, }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - nodeSwitch, - } - podLSP := &nbdb.LogicalSwitchPort{ - UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", - Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), - Addresses: []string{podAddr}, - ExternalIDs: map[string]string{ - "pod": "true", - "namespace": egressPod1.Namespace, - }, - Options: map[string]string{ - "requested-chassis": egressPod1.Spec.NodeName, - "iface-id-ver": egressPod1.Name, - }, - PortSecurity: []string{podAddr}, - } - nodeSwitch.Ports = []string{podLSP.UUID} - finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - _, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // delete the pod - err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Delete(context.TODO(), - egressPod1.Name, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseStateWithoutPod := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", @@ -3874,33 +5267,19 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "exclude-lb-vips-from-garp": "true", }, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - Ports: []string{}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithoutPod)) - // recreate pod with same name immediately; simulating handler race (pods v/s egressip) condition, - // so instead of proper pod create, we try out egressIP pod setup which will be a no-op since pod doesn't exist - ginkgo.By("should not add egress IP setup for a deleted pod whose entry exists in logicalPortCache") - err = fakeOvn.controller.addPodEgressIPAssignments(egressIPName, eIP.Status.Items, &egressPod1) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // pod is gone but logicalPortCache holds the entry for 60seconds - egressPodPortInfo, err = fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeFalse()) - staleEgressPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(staleEgressPodIP).To(gomega.Equal(egressPodIP)) - // no-op - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStateWithoutPod)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -3908,42 +5287,22 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("egressIP pod recreate with same name (stateful-sets) shouldn't use stale logicalPortCache entries AND stale podAssignment cache entries", func() { + ginkgo.It("should remove stale EgressIP setup when pod is deleted while ovnkube-master is not running", func() { app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - - egressIP1 := "192.168.126.101" - node1IPv4 := "192.168.126.12/24" + egressIP1 := "192.168.126.25" + node1IPv4 := "192.168.126.51/24" - oldEgressPodIP := "10.128.0.50" - egressPod1 := newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) - oldAnnotation := map[string]string{"k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.128.0.50/24"],"mac_address":"0a:58:0a:80:00:05","gateway_ips":["10.128.0.1"],"routes":[{"dest":"10.128.0.0/24","nextHop":"10.128.0.1"}],"ip_address":"10.128.0.50/24","gateway_ip":"10.128.0.1"}}`} - egressPod1.Annotations = oldAnnotation egressNamespace := newNamespace(namespace) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -3967,26 +5326,48 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, } - nodeSwitch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } + expectedNatLogicalPort := "k8s-node1" fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "keep-me-UUID", + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Priority: types.DefaultNoRereoutePriority, + Action: nbdb.LogicalRouterPolicyActionAllow, + }, + &nbdb.LogicalRouterPolicy{ + UUID: "remove-me-UUID", + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + Match: "ip.src == 10.128.3.8", + Priority: types.EgressIPReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + }, &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"remove-me-UUID", "keep-me-UUID"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podV4IP, + ExternalIP: egressIP1, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", @@ -3996,7 +5377,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, }, }, - nodeSwitch, }, }, &egressipv1.EgressIPList{ @@ -4008,64 +5388,24 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, - &v1.PodList{ - Items: []v1.Pod{*egressPod1}, - }, ) - // we don't know the real switch UUID in the db, but it can be found by name - swUUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, swUUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - fakeOvn.controller.WatchPods() - fakeOvn.controller.WatchEgressIPNamespaces() - fakeOvn.controller.WatchEgressIPPods() - fakeOvn.controller.WatchEgressNodes() - fakeOvn.controller.WatchEgressIP() - - oldEgressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(egressPod1, types.DefaultNetworkName) + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, _, err := net.ParseCIDR(oldEgressPodPortInfo.ips[0].String()) + err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressPodIP.String()).To(gomega.Equal(oldEgressPodIP)) - gomega.Expect(oldEgressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - podAddr := fmt.Sprintf("%s %s", oldEgressPodPortInfo.mac.String(), egressPodIP) - - expectedNatLogicalPort1 := "k8s-node1" - podEIPSNAT := &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: egressPodIP.String(), - ExternalIP: egressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy := &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", oldEgressPodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID1", - } - node1GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1"}, - } - expectedDatabaseStatewithPod := []libovsdbtest.TestData{ - podEIPSNAT, + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + expectedDatabaseState := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, + UUID: "keep-me-UUID", Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Priority: types.DefaultNoRereoutePriority, Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -4073,13 +5413,16 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, - podReRoutePolicy, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + Policies: []string{"keep-me-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID"}, }, - node1GR, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, @@ -4090,132 +5433,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "exclude-lb-vips-from-garp": "true", }, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - nodeSwitch, - } - podLSP := &nbdb.LogicalSwitchPort{ - UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", - Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), - Addresses: []string{podAddr}, - ExternalIDs: map[string]string{ - "pod": "true", - "namespace": egressPod1.Namespace, - }, - Options: map[string]string{ - "requested-chassis": egressPod1.Spec.NodeName, - "iface-id-ver": egressPod1.Name, - }, - PortSecurity: []string{podAddr}, - } - nodeSwitch.Ports = []string{podLSP.UUID} - finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - _, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // delete the pod and simulate a cleanup failure: - // 1) create a situation where pod is gone from kapi but egressIP setup wasn't cleanedup due to deletion error - // - we remove annotation from pod to mimic this situation - // 2) leaves us with a stale podAssignment cache - // 3) check to make sure the logicalPortCache is used always even if podAssignment already has the podKey - ginkgo.By("delete the egress IP pod and force the deletion to fail") - egressPod1.Annotations = map[string]string{} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Update(context.TODO(), egressPod1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // Wait for the cleared annotations to show up client-side - gomega.Eventually(func() int { - egressPod1, _ = fakeOvn.watcher.GetPod(egressPod1.Namespace, egressPod1.Name) - return len(egressPod1.Annotations) - }, 5).Should(gomega.Equal(0)) - - // Delete the pod to trigger the cleanup failure - err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Delete(context.TODO(), - egressPod1.Name, metav1.DeleteOptions{}) - // internally we have an error: - // E1006 12:51:59.594899 2500972 obj_retry.go:1517] Failed to delete *factory.egressIPPod egressip-namespace/egress-pod, error: pod egressip-namespace/egress-pod: no pod IPs found - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - // notice that pod objects aren't cleaned up yet since deletion failed! - // even the LSP sticks around for 60 seconds - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - // egressIP cache is stale in the sense the podKey has not been deleted since deletion failed - pas := getPodAssignmentState(egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - gomega.Expect(pas.egressStatuses).To(gomega.Equal(map[egressipv1.EgressIPStatusItem]string{ - { - Node: "node1", - EgressIP: "192.168.126.101", - }: "", - })) - // recreate pod with same name immediately; - ginkgo.By("should add egress IP setup for the NEW pod which exists in logicalPortCache") - newEgressPodIP := "10.128.0.60" - egressPod1 = newPodWithLabels(namespace, podName, node1Name, newEgressPodIP, egressPodLabel) - egressPod1.Annotations = map[string]string{"k8s.ovn.org/pod-networks": `{"default":{"ip_addresses":["10.128.0.60/24"],"mac_address":"0a:58:0a:80:00:06","gateway_ips":["10.128.0.1"],"routes":[{"dest":"10.128.0.0/24","nextHop":"10.128.0.1"}],"ip_address":"10.128.0.60/24","gateway_ip":"10.128.0.1"}}`} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Create(context.TODO(), egressPod1, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // wait for the logical port cache to get updated with the new pod's IP - var newEgressPodPortInfo *lpInfo - getEgressPodIP := func() string { - newEgressPodPortInfo, err = fakeOvn.controller.logicalPortCache.get(egressPod1, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, _, err := net.ParseCIDR(newEgressPodPortInfo.ips[0].String()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - return egressPodIP.String() - } - gomega.Eventually(func() string { - return getEgressPodIP() - }).Should(gomega.Equal(newEgressPodIP)) - gomega.Expect(newEgressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - - // deletion for the older EIP pod object is still being retried so we still have SNAT - // towards nodeIP for new pod which is created by addLogicalPort. - // Note that we while have the stale re-route policy for old pod, the snat for the old pod towards egressIP is gone - // because deleteLogicalPort removes ALL snats for a given pod but doesn't remove the policies. - ipv4Addr, _, _ := net.ParseCIDR(node1IPv4) - podNodeSNAT := &nbdb.NAT{ - UUID: "node-nat-UUID1", - LogicalIP: newEgressPodIP, - ExternalIP: ipv4Addr.String(), - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, } - finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) - node1GR.Nat = []string{podNodeSNAT.UUID} - podAddr = fmt.Sprintf("%s %s", newEgressPodPortInfo.mac.String(), newEgressPodIP) - podLSP.PortSecurity = []string{podAddr} - podLSP.Addresses = []string{podAddr} - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[1:])) - - ginkgo.By("trigger a forced retry and ensure deletion of oldPod and creation of newPod are successful") - // let us add back the annotation to the oldPod which is being retried to make deletion a success - podKey, err := retry.GetResourceKey(egressPod1) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - retry.CheckRetryObjectEventually(podKey, true, fakeOvn.controller.retryEgressIPPods) - retryOldObj := retry.GetOldObjFromRetryObj(podKey, fakeOvn.controller.retryEgressIPPods) - //fakeOvn.controller.retryEgressIPPods.retryEntries.LoadOrStore(podKey, &RetryObjEntry{backoffSec: 1}) - pod, _ := retryOldObj.(*kapi.Pod) - pod.Annotations = oldAnnotation - fakeOvn.controller.retryEgressIPPods.RequestRetryObjs() - // there should also be no entry for this pod in the retry cache - gomega.Eventually(func() bool { - return retry.CheckRetryObj(podKey, fakeOvn.controller.retryEgressIPPods) - }, retry.RetryObjInterval+time.Second).Should(gomega.BeFalse()) - - // ensure that egressIP setup is being done with the new pod's information from logicalPortCache - podReRoutePolicy.Match = fmt.Sprintf("ip4.src == %s", newEgressPodIP) - podEIPSNAT.LogicalIP = newEgressPodIP - node1GR.Nat = []string{podEIPSNAT.UUID} - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[:len(finalDatabaseStatewithPod)-1])) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -4223,85 +5442,30 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("egressIP pod managed by multiple objects, verify standby works wells, verify syncPodAssignmentCache on restarts", func() { + ginkgo.It("should remove stale pod SNAT referring to wrong logical port after ovnkube-master is started", func() { app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - - egressIP1 := "192.168.126.25" - egressIP2 := "192.168.126.30" - egressIP3 := "192.168.126.35" + egressIP := "192.168.126.25" node1IPv4 := "192.168.126.12/24" - node2IPv4 := "192.168.126.13/24" - egressPod1 := *newPodWithLabels(namespace, podName, node1Name, "", egressPodLabel) + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) egressNamespace := newNamespace(namespace) - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", } - - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:50", "ip-address":"192.168.126.13/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", } + node1 := getNodeObj(node1Name, annotations, labels) - eIP1 := egressipv1.EgressIP{ + eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, - }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - eIP2 := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName2), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP3}, + EgressIPs: []string{egressIP}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -4324,10 +5488,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", } - node2GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - } node1LSP := &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, @@ -4336,15 +5496,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, }, } - node2LSP := &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - } - fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ @@ -4352,44 +5503,49 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, - node1GR, node2GR, - node1LSP, node2LSP, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, + node1GR, + node1LSP, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{"100.64.0.2/29"}, }, node1Switch, - &nbdb.LogicalSwitch{ - UUID: node2.Name + "-UUID", - Name: node2.Name, + // This is unexpected snat entry where its logical port refers to an unavailable node + // and ensure this entry is removed as soon as ovnk master is up and running. + &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.String("k8s-node2"), + Options: map[string]string{ + "stateless": "false", + }, }, }, }, &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP1, eIP2}, + Items: []egressipv1.EgressIP{eIP}, }, &v1.NodeList{ - Items: []v1.Node{node1, node2}, + Items: []v1.Node{node1}, }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod1}, + Items: []v1.Pod{egressPod}, }, ) - // we don't know the real switch UUID in the db, but it can be found by name - sw1UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node1.Name) - sw2UUID := getLogicalSwitchUUID(fakeOvn.controller.nbClient, node2.Name) - fakeOvn.controller.lsManager.AddSwitch(node1.Name, sw1UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) - fakeOvn.controller.lsManager.AddSwitch(node2.Name, sw2UUID, []*net.IPNet{ovntest.MustParseIPNet(v4NodeSubnet)}) + i, n, _ := net.ParseCIDR(podV4IP + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + err := fakeOvn.controller.WatchPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPNamespaces() @@ -4401,9 +5557,9 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod1, types.DefaultNetworkName) + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod1.Namespace).Get(context.TODO(), egressPod1.Name, metav1.GetOptions{}) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -4411,324 +5567,45 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - podAddr := fmt.Sprintf("%s %s", egressPodPortInfo.mac.String(), egressPodIP[0].String()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - // Ensure first egressIP object is assigned, since only node1 is an egressNode, only 1IP will be assigned, other will be pending - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - recordedEvent := <-fakeOvn.fakeRecorder.Events - gomega.Expect(recordedEvent).To(gomega.ContainSubstring("Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", eIP1.Name)) - egressIPs1, nodes1 := getEgressIPStatus(egressIPName) - gomega.Expect(nodes1[0]).To(gomega.Equal(node1.Name)) - possibleAssignments := sets.NewString(egressIP1, egressIP2) - gomega.Expect(possibleAssignments.Has(egressIPs1[0])).To(gomega.BeTrue()) - - // Ensure second egressIP object is also assigned to node1, but no OVN config will be done for this - gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) - egressIPs2, nodes2 := getEgressIPStatus(egressIPName2) - gomega.Expect(nodes2[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) - recordedEvent = <-fakeOvn.fakeRecorder.Events - gomega.Expect(recordedEvent).To(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) - - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - - assginedEIP := egressIPs1[0] - gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) - eip1Obj, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) - gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - podEIPSNAT := &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: egressPodIP[0].String(), - ExternalIP: assginedEIP, - ExternalIDs: map[string]string{ - "name": pas.egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node1"), - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy := &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": pas.egressIPName, - }, - UUID: "reroute-UUID1", - } - node1GR.Nat = []string{"egressip-nat-UUID1"} + podEIPSNAT := getEIPSNAT(podV4IP, egressIP, "k8s-node1") + podReRoutePolicy := getReRoutePolicy(egressPodIP[0].String(), "4", nodeLogicalRouterIPv4) + node1GR.Nat = []string{"egressip-nat-UUID"} node1LSP.Options = map[string]string{ "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", } expectedDatabaseStatewithPod := []libovsdbtest.TestData{ - podEIPSNAT, - &nbdb.LogicalRouterPolicy{ + podEIPSNAT, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ + }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, - podReRoutePolicy, - &nbdb.LogicalRouter{ + }, podReRoutePolicy, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, - }, - node1GR, node2GR, - node1LSP, node2LSP, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID"}, + }, node1GR, node1LSP, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - node1Switch, - &nbdb.LogicalSwitch{ - UUID: node2.Name + "-UUID", - Name: node2.Name, - }, - } - podLSP := &nbdb.LogicalSwitchPort{ - UUID: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name) + "-UUID", - Name: util.GetLogicalPortName(egressPod1.Namespace, egressPod1.Name), - Addresses: []string{podAddr}, - ExternalIDs: map[string]string{ - "pod": "true", - "namespace": egressPod1.Namespace, - }, - Options: map[string]string{ - "requested-chassis": egressPod1.Spec.NodeName, - "iface-id-ver": egressPod1.Name, - }, - PortSecurity: []string{podAddr}, - } - node1Switch.Ports = []string{podLSP.UUID} - finalDatabaseStatewithPod := append(expectedDatabaseStatewithPod, podLSP) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // Make second node egressIP assignable - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // ensure secondIP from first object gets assigned to node2 - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - egressIPs1, nodes1 = getEgressIPStatus(egressIPName) - gomega.Expect(nodes1[1]).To(gomega.Equal(node2.Name)) - gomega.Expect(possibleAssignments.Has(egressIPs1[1])).To(gomega.BeTrue()) - - podEIPSNAT2 := &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: egressPodIP[0].String(), - ExternalIP: egressIPs1[1], - ExternalIDs: map[string]string{ - "name": pas.egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node2"), - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy.Nexthops = []string{nodeLogicalRouterIPv4[0], node2LogicalRouterIPv4[0]} - node2GR.Nat = []string{"egressip-nat-UUID2"} - node2LSP.Options = map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - } - finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podEIPSNAT2) - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // check the state of the cache for podKey - pas = getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - - gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) - eip1Obj, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[0]]).To(gomega.Equal("")) - gomega.Expect(pas.egressStatuses[eip1Obj.Status.Items[1]]).To(gomega.Equal("")) - gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) - - // let's test syncPodAssignmentCache works as expected! Nuke the podAssignment cache first - fakeOvn.controller.eIPC.podAssignmentMutex.Lock() - fakeOvn.controller.eIPC.podAssignment = make(map[string]*podAssignmentState) // replicates controller startup state - fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() - - egressIPCache, err := fakeOvn.controller.generateCacheForEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.syncPodAssignmentCache(egressIPCache) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - pas = getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - gomega.Expect(pas.egressIPName).To(gomega.Equal(egressIPName)) - gomega.Expect(pas.egressStatuses).To(gomega.Equal(map[egressipv1.EgressIPStatusItem]string{})) - gomega.Expect(pas.standbyEgressIPNames.Has(egressIPName2)).To(gomega.BeTrue()) - - // reset egressStatuses for rest of the test to progress correctly - fakeOvn.controller.eIPC.podAssignmentMutex.Lock() - fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses[eip1Obj.Status.Items[0]] = "" - fakeOvn.controller.eIPC.podAssignment[getPodKey(&egressPod1)].egressStatuses[eip1Obj.Status.Items[1]] = "" - fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() - - // delete the standby egressIP object to make sure the cache is updated - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName2, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeFalse()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - - // add back the standby egressIP object - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP2, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeTrue()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - gomega.Eventually(func() string { - return <-fakeOvn.fakeRecorder.Events - }).Should(gomega.ContainSubstring("EgressIP object egressip-2 will not be configured for pod egressip-namespace_egress-pod since another egressIP object egressip is serving it, this is undefined")) - - gomega.Eventually(getEgressIPStatusLen(egressIPName2)).Should(gomega.Equal(1)) - egressIPs2, nodes2 = getEgressIPStatus(egressIPName2) - gomega.Expect(egressIPs2[0]).To(gomega.Equal(egressIP3)) - assginedNodeForEIPObj2 := nodes2[0] - - // Delete the IP from object1 that was on node1 and ensure standby is not taking over - eIPUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - ipOnNode1 := assginedEIP - var ipOnNode2 string - if ipOnNode1 == egressIP1 { - ipOnNode2 = egressIP2 - } else { - ipOnNode2 = egressIP1 - } - eIPUpdate.Spec.EgressIPs = []string{ipOnNode2} - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs1, nodes1 = getEgressIPStatus(egressIPName) - gomega.Expect(nodes1[0]).To(gomega.Equal(node2.Name)) - gomega.Expect(egressIPs1[0]).To(gomega.Equal(ipOnNode2)) - - // check if the setup for firstIP from object1 is deleted properly - podReRoutePolicy.Nexthops = node2LogicalRouterIPv4 - podNodeSNAT := &nbdb.NAT{ - UUID: "node-nat-UUID1", - LogicalIP: egressPodIP[0].String(), - ExternalIP: "192.168.126.12", // adds back SNAT to nodeIP - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, - } - node1GR.Nat = []string{podNodeSNAT.UUID} - finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod[1:])) - - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeTrue()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName)) - - // delete the first egressIP object and make sure the cache is updated - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // ensure standby takes over and we do the setup for it in OVN DB - gomega.Eventually(func() bool { - pas := getPodAssignmentState(&egressPod1) - gomega.Expect(pas).NotTo(gomega.BeNil()) - return pas.standbyEgressIPNames.Has(egressIPName2) - }).Should(gomega.BeFalse()) - gomega.Expect(getPodAssignmentState(&egressPod1).egressIPName).To(gomega.Equal(egressIPName2)) - - finalDatabaseStatewithPod = expectedDatabaseStatewithPod - finalDatabaseStatewithPod = append(expectedDatabaseStatewithPod, podLSP) - podEIPSNAT.ExternalIP = egressIP3 - podEIPSNAT.ExternalIDs = map[string]string{ - "name": egressIPName2, - } - podReRoutePolicy.ExternalIDs = map[string]string{ - "name": egressIPName2, - } - if assginedNodeForEIPObj2 == node2.Name { - podEIPSNAT.LogicalPort = utilpointer.StringPtr("k8s-node2") - finalDatabaseStatewithPod = append(finalDatabaseStatewithPod, podNodeSNAT) - node1GR.Nat = []string{podNodeSNAT.UUID} - node2GR.Nat = []string{podEIPSNAT.UUID} - } - if assginedNodeForEIPObj2 == node1.Name { - podReRoutePolicy.Nexthops = nodeLogicalRouterIPv4 - node1GR.Nat = []string{podEIPSNAT.UUID} - node2GR.Nat = []string{} - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalDatabaseStatewithPod)) - - // delete the second egressIP object to make sure the cache is updated podKey should be gone since nothing is managing it anymore - err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Delete(context.TODO(), egressIPName2, metav1.DeleteOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(func() bool { - return getPodAssignmentState(&egressPod1) != nil - }).Should(gomega.BeFalse()) - - // let's test syncPodAssignmentCache works as expected! Nuke the podAssignment cache first - fakeOvn.controller.eIPC.podAssignmentMutex.Lock() - fakeOvn.controller.eIPC.podAssignment = make(map[string]*podAssignmentState) // replicates controller startup state - fakeOvn.controller.eIPC.podAssignmentMutex.Unlock() - - egressIPCache, err = fakeOvn.controller.generateCacheForEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.syncPodAssignmentCache(egressIPCache) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - // we don't have any egressIPs, so cache is nil - gomega.Eventually(func() bool { - return getPodAssignmentState(&egressPod1) != nil - }).Should(gomega.BeFalse()) + }, node1Switch} + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) return nil } @@ -4736,28 +5613,35 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should skip populating egress node data for nodes that have incorrect IP address", func() { + ginkgo.It("should only get assigned EgressIPs which matches their subnet when the node is tagged", func() { app.Action = func(ctx *cli.Context) error { - nodeIPv4 := "192.168.126.510/24" - nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, + egressIP := "192.168.126.101" + node1IPv4 := "192.168.128.202/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" + + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) + + eIP := egressipv1.EgressIP{ + ObjectMeta: newEgressIPMeta(egressIPName), + Spec: egressipv1.EgressIPSpec{ + EgressIPs: []string{egressIP}, }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, }, } + fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ @@ -4765,345 +5649,46 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, }, }, - &v1.NodeList{ - Items: []v1.Node{node}, + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, }, - ) - - allocatorItems := func() int { - return len(fakeOvn.controller.eIPC.allocator.cache) - } + &v1.NodeList{ + Items: []v1.Node{node1, node2}, + }) - err := fakeOvn.controller.WatchEgressNodes() + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(allocatorItems).Should(gomega.Equal(0)) - - node.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(allocatorItems).Should(gomega.Equal(0)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should probe nodes using grpc", func() { - app.Action = func(ctx *cli.Context) error { - - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node1", - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", "", node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "node2", - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }) - gomega.Expect(fakeOvn.controller.WatchEgressNodes()).To(gomega.Succeed()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(0)) - - _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) - _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node1, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - - cachedEgressNode1 := fakeOvn.controller.eIPC.allocator.cache[node1.Name] - cachedEgressNode2 := fakeOvn.controller.eIPC.allocator.cache[node2.Name] - gomega.Expect(cachedEgressNode1.egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) - gomega.Expect(cachedEgressNode2.egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) - - // Explicitly call check reachibility so we need not to wait for slow periodic timer - checkEgressNodesReachabilityIterate(fakeOvn.controller) - gomega.Expect(cachedEgressNode1.isReachable).To(gomega.BeTrue()) - gomega.Expect(cachedEgressNode2.isReachable).To(gomega.BeTrue()) - - // The test cases below will manipulate the fakeEgressIPHealthClient used for mocking - // a gRPC session dedicated to monitoring each of the 2 nodes created. It does that - // by setting the probe fail boolean which in turn causes the mocked probe call to - // pretend that the periodic monitor succeeded or not. - tests := []struct { - desc string - node1FailProbes bool - node2FailProbes bool - // This function is an optional and generic function for the test case - // to allow any special pre-conditioning needed before invoking of - // checkEgressNodesReachabilityIterate in the test. - tcPrepareFunc func(hcc1, hcc2 *fakeEgressIPHealthClient) - }{ - { - desc: "disconnect nodes", - node1FailProbes: true, - node2FailProbes: true, - tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { - hcc1.Disconnect() - hcc2.Disconnect() - }, - }, - { - desc: "connect node1", - node2FailProbes: true, - }, - { - desc: "node1 connected, connect node2", - }, - { - desc: "node1 and node2 connected, bump only node2 counters", - node1FailProbes: true, - }, - { - desc: "node2 connected, disconnect node1", - node1FailProbes: true, - node2FailProbes: true, - tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { - hcc1.Disconnect() - }, - }, - { - desc: "connect node1, disconnect node2", - node2FailProbes: true, - tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { - hcc2.Disconnect() - }, - }, - { - desc: "node1 and node2 connected and both counters bump", - tcPrepareFunc: func(hcc1, hcc2 *fakeEgressIPHealthClient) { - // Perform an additional iteration, to make probe counters to bump on second call - checkEgressNodesReachabilityIterate(fakeOvn.controller) - }, - }, - } - - // hcc1 and hcc2 are the mocked gRPC client to node1 and node2, respectively. - // They are what we use to manipulate whether probes to the node should fail or - // not, as well as a mechanism for explicitly disconnecting as part of the test. - hcc1 := cachedEgressNode1.healthClient.(*fakeEgressIPHealthClient) - hcc2 := cachedEgressNode2.healthClient.(*fakeEgressIPHealthClient) - - // ttIterCheck is the common function used by each test case. It will check whether - // a client changed its connection state and if the number of probes to the node - // changed as expected. - ttIterCheck := func(hcc *fakeEgressIPHealthClient, prevNodeIsConnected bool, prevProbes int, failProbes bool, desc string) { - currNodeIsConnected := hcc.IsConnected() - gomega.Expect(currNodeIsConnected || failProbes).To(gomega.BeTrue(), desc) - - if !prevNodeIsConnected && !currNodeIsConnected { - // Not connected (before and after): no probes should be successful - gomega.Expect(hcc.ProbeCount).To(gomega.Equal(prevProbes), desc) - } else if prevNodeIsConnected && currNodeIsConnected { - if failProbes { - // Still connected, but no probes should be successful - gomega.Expect(prevProbes).To(gomega.Equal(hcc.ProbeCount), desc) - } else { - // Still connected and probe counters should be going up - gomega.Expect(prevProbes < hcc.ProbeCount).To(gomega.BeTrue(), desc) - } - } - } - - for _, tt := range tests { - hcc1.FakeProbeFailure = tt.node1FailProbes - hcc2.FakeProbeFailure = tt.node2FailProbes - - prevNode1IsConnected := hcc1.IsConnected() - prevNode2IsConnected := hcc2.IsConnected() - prevNode1Probes := hcc1.ProbeCount - prevNode2Probes := hcc2.ProbeCount - - if tt.tcPrepareFunc != nil { - tt.tcPrepareFunc(hcc1, hcc2) - } - - // Perform connect or probing, depending on the state of the connections - checkEgressNodesReachabilityIterate(fakeOvn.controller) - - ttIterCheck(hcc1, prevNode1IsConnected, prevNode1Probes, tt.node1FailProbes, tt.desc) - ttIterCheck(hcc2, prevNode2IsConnected, prevNode2Probes, tt.node2FailProbes, tt.desc) - } - - gomega.Expect(hcc1.IsConnected()).To(gomega.BeTrue()) - gomega.Expect(hcc2.IsConnected()).To(gomega.BeTrue()) - - // Lastly, remove egress assignable from node 2 and make sure it disconnects - node2.Labels = map[string]string{} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - - // Explicitly call check reachibility so we need not to wait for slow periodic timer - checkEgressNodesReachabilityIterate(fakeOvn.controller) - - gomega.Expect(hcc1.IsConnected()).To(gomega.BeTrue()) - gomega.Expect(hcc2.IsConnected()).To(gomega.BeFalse()) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - }) - - ginkgo.Context("WatchEgressNodes running with WatchEgressIP", func() { - - ginkgo.It("should treat un-assigned EgressIPs when it is tagged", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := "192.168.126.101" - nodeIPv4 := "192.168.126.51/24" - nodeIPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4, nodeIPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) expectedDatabaseState := []libovsdbtest.TestData{ @@ -5125,46 +5710,43 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node.Name)).Should(gomega.BeFalse()) gomega.Eventually(eIP.Status.Items).Should(gomega.HaveLen(0)) - node.Labels = map[string]string{ + node1.Labels = map[string]string{ "k8s.ovn.org/egress-assignable": "", } - _, ipv4Sub, err := net.ParseCIDR(nodeIPv4) - _, ipv6Sub, err := net.ParseCIDR(nodeIPv6) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node.Name)).Should(gomega.BeTrue()) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveLen(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node.Name].egressIPConfig.V4.Net).To(gomega.Equal(ipv4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node.Name].egressIPConfig.V6.Net).To(gomega.Equal(ipv6Sub)) - - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -5184,137 +5766,54 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", }, }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should result in error and event if specified egress IP is a cluster node IP", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := "192.168.126.51" - node1IPv4 := "192.168.128.202/24" - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }) + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseState := []libovsdbtest.TestData{ + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node2.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", @@ -5363,12 +5862,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - gomega.Eventually(fakeOvn.fakeRecorder.Events).Should(gomega.HaveLen(3)) return nil } @@ -5376,51 +5869,24 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should re-assigned EgressIPs when more nodes get tagged if the first assignment attempt wasn't fully successful", func() { + ginkgo.It("should try re-assigning EgressIP until all defined egress IPs are assigned", func() { app.Action = func(ctx *cli.Context) error { - egressIP1 := "192.168.126.25" - egressIP2 := "192.168.126.30" - node1IPv4 := "192.168.126.51/24" - node2IPv4 := "192.168.126.101/24" + egressIP1 := "192.168.126.101" + egressIP2 := "192.168.126.102" + node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.51/24" - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), @@ -5522,27 +5988,105 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node1Name, egressIP1) + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + _, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - recordedEvent := <-fakeOvn.fakeRecorder.Events - gomega.Expect(recordedEvent).To(gomega.ContainSubstring("Not all egress IPs for EgressIP: %s could be assigned, please tag more nodes", eIP.Name)) - node2.Labels = map[string]string{ "k8s.ovn.org/egress-assignable": "", } + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP1, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -5598,13 +6142,14 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should remove stale EgressIP setup when node label is removed while ovnkube-master is not running and assign to newly labelled node", func() { + ginkgo.It("ensure egress ip entries are not created when pod is already moved into completed state", func() { app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.25" - node1IPv4 := "192.168.126.51/24" + config.Gateway.DisableSNATMultipleGWs = true + egressIP := "192.168.126.25" + node1IPv4 := "192.168.126.12/24" egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressPod.Status.Phase = v1.PodSucceeded egressNamespace := newNamespace(namespace) node1 := v1.Node{ @@ -5613,27 +6158,12 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Annotations: map[string]string{ "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, Labels: map[string]string{ "k8s.ovn.org/egress-assignable": "", }, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, }, Status: v1.NodeStatus{ Conditions: []v1.NodeCondition{ @@ -5648,7 +6178,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + EgressIPs: []string{egressIP}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -5659,62 +6189,48 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - Node: node1.Name, - EgressIP: egressIP1, - }, - }, + Items: []egressipv1.EgressIPStatusItem{}, }, } + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ &nbdb.LogicalRouter{ - Name: types.OVNClusterRouter, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, + node1GR, + node1LSP, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, + Networks: []string{"100.64.0.2/29"}, }, + node1Switch, }, }, &egressipv1.EgressIPList{ Items: []egressipv1.EgressIP{eIP}, }, &v1.NodeList{ - Items: []v1.Node{node1, node2}, + Items: []v1.Node{node1}, }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, @@ -5723,12 +6239,13 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Pod{egressPod}, }, ) - i, n, _ := net.ParseCIDR(podV4IP + "/23") n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -5737,90 +6254,52 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) + + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - expectedNatLogicalPort := "k8s-node2" - expectedDatabaseState := []libovsdbtest.TestData{ + egressIPs, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + node1LSP.Options = map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + } + expectedDatabaseStatewithPod := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "default-no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ + }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"reroute-UUID", "default-no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, node1GR, node1LSP, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) return nil } @@ -5828,23 +6307,26 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("should remove stale EgressIP setup when pod is deleted while ovnkube-master is not running", func() { + ginkgo.It("ensure external gw pod snat entry is not created back when pod is moved into completed state", func() { app.Action = func(ctx *cli.Context) error { + config.Gateway.DisableSNATMultipleGWs = true + egressIP := "192.168.126.25" + node1IPv4 := "192.168.126.12/24" - egressIP1 := "192.168.126.25" - node1IPv4 := "192.168.126.51/24" - + egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) egressNamespace := newNamespace(namespace) node1 := v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: node1Name, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, Annotations: map[string]string{ "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + }, + Labels: map[string]string{ + "k8s.ovn.org/egress-assignable": "", }, }, Status: v1.NodeStatus{ @@ -5860,7 +6342,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + EgressIPs: []string{egressIP}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -5871,219 +6353,32 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, }, Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - Node: node1.Name, - EgressIP: egressIP1, - }, - }, + Items: []egressipv1.EgressIPStatusItem{}, }, } - expectedNatLogicalPort := "k8s-node1" + node1Switch := &nbdb.LogicalSwitch{ + UUID: node1.Name + "-UUID", + Name: node1.Name, + } + node1GR := &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + } + node1LSP := &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "keep-me-UUID", - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Priority: types.DefaultNoRereoutePriority, - Action: nbdb.LogicalRouterPolicyActionAllow, - }, - &nbdb.LogicalRouterPolicy{ - UUID: "remove-me-UUID", - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - Match: "ip.src == 10.128.3.8", - Priority: types.EgressIPReroutePriority, - Action: nbdb.LogicalRouterPolicyActionReroute, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"remove-me-UUID", "keep-me-UUID"}, - }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID", - LogicalIP: podV4IP, - ExternalIP: egressIP1, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - ) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - UUID: "keep-me-UUID", - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Priority: types.DefaultNoRereoutePriority, - Action: nbdb.LogicalRouterPolicyActionAllow, - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"keep-me-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("ensure egress ip entries are not created when pod is already moved into completed state", func() { - app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - egressIP := "192.168.126.25" - node1IPv4 := "192.168.126.12/24" - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressPod.Status.Phase = kapi.PodSucceeded - egressNamespace := newNamespace(namespace) - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, - }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - node1Switch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } - node1GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - } - node1LSP := &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", }, node1GR, node1LSP, @@ -6108,6 +6403,7 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Items: []v1.Pod{egressPod}, }, ) + i, n, _ := net.ParseCIDR(podV4IP + "/23") n.IP = i fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) @@ -6123,6 +6419,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node1Name, egressIP) + egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) gomega.Expect(err).NotTo(gomega.HaveOccurred()) ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) @@ -6132,21 +6430,78 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) + gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) egressIPs, nodes := getEgressIPStatus(egressIPName) gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + podEIPSNAT := &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: utilpointer.StringPtr("k8s-node1"), + Options: map[string]string{ + "stateless": "false", + }, + } + podReRoutePolicy := &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + UUID: "reroute-UUID1", + } + node1GR.Nat = []string{"egressip-nat-UUID1"} node1LSP.Options = map[string]string{ "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, "nat-addresses": "router", "exclude-lb-vips-from-garp": "true", } expectedDatabaseStatewithPod := []libovsdbtest.TestData{ + podEIPSNAT, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, podReRoutePolicy, &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, + }, node1GR, node1LSP, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, node1Switch} + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + + egressPod.Status.Phase = v1.PodSucceeded + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), &egressPod, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // Wait for pod to get moved into succeeded state. + gomega.Eventually(func() v1.PodPhase { + egressPod1, _ := fakeOvn.watcher.GetPod(egressPod.Namespace, egressPod.Name) + return egressPod1.Status.Phase + }, 5).Should(gomega.Equal(v1.PodSucceeded)) + + node1GR.Nat = []string{} + expectedDatabaseStatewitCompletedPod := []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", @@ -6168,7 +6523,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Networks: []string{"100.64.0.2/29"}, }, node1Switch} - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewitCompletedPod)) + return nil } @@ -6176,42 +6532,37 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - ginkgo.It("ensure external gw pod snat entry is not created back when pod is moved into completed state", func() { + ginkgo.It("should ensure SNATs towards egressIP and nodeIP are correctly configured during egressIP re-assignment", func() { app.Action = func(ctx *cli.Context) error { config.Gateway.DisableSNATMultipleGWs = true - egressIP := "192.168.126.25" + + egressIP1 := "192.168.126.101" + egressIP2 := "192.168.126.102" node1IPv4 := "192.168.126.12/24" + node2IPv4 := "192.168.126.51/24" - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressPod1 := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) + egressPod2 := *newPodWithLabels(namespace, "egress-pod2", node2Name, "10.128.0.16", egressPodLabel) egressNamespace := newNamespace(namespace) - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + } + node1 := getNodeObj(node1Name, annotations, map[string]string{}) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.51/24", "next-hop":"192.168.126.1"}}`, + "k8s.ovn.org/node-chassis-id": "89fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", + } + node2 := getNodeObj(node2Name, annotations, map[string]string{}) eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, + EgressIPs: []string{egressIP1, egressIP2}, PodSelector: metav1.LabelSelector{ MatchLabels: egressPodLabel, }, @@ -6226,22 +6577,6 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }, } - node1Switch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } - node1GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - } - node1LSP := &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - } fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ @@ -6249,37 +6584,64 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, - node1GR, - node1LSP, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{"100.64.0.2/29"}, }, - node1Switch, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, }, }, &egressipv1.EgressIPList{ Items: []egressipv1.EgressIP{eIP}, }, &v1.NodeList{ - Items: []v1.Node{node1}, + Items: []v1.Node{node1, node2}, }, &v1.NamespaceList{ Items: []v1.Namespace{*egressNamespace}, }, &v1.PodList{ - Items: []v1.Pod{egressPod}, + Items: []v1.Pod{egressPod1, egressPod2}, }, ) i, n, _ := net.ParseCIDR(podV4IP + "/23") n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + fakeOvn.controller.logicalPortCache.add(&egressPod1, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + i, n, _ = net.ParseCIDR("10.128.0.16" + "/23") + n.IP = i + fakeOvn.controller.logicalPortCache.add(&egressPod2, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - err := fakeOvn.controller.WatchPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() + err := fakeOvn.controller.WatchEgressIPNamespaces() gomega.Expect(err).NotTo(gomega.HaveOccurred()) err = fakeOvn.controller.WatchEgressIPPods() gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -6288,462 +6650,334 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) - gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - podEIPSNAT := &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node1"), - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy := &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - UUID: "reroute-UUID1", - } - node1GR.Nat = []string{"egressip-nat-UUID1"} - node1LSP.Options = map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - } - expectedDatabaseStatewithPod := []libovsdbtest.TestData{ - podEIPSNAT, &nbdb.LogicalRouterPolicy{ + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-UUID", - }, &nbdb.LogicalRouterPolicy{ + }, + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, podReRoutePolicy, &nbdb.LogicalRouter{ + }, + &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, - }, node1GR, node1LSP, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, &nbdb.LogicalRouterPort{ UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, Networks: []string{"100.64.0.2/29"}, - }, node1Switch} + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + node1.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } - egressPod.Status.Phase = kapi.PodSucceeded - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Update(context.TODO(), &egressPod, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - // Wait for pod to get moved into succeeded state. - gomega.Eventually(func() v1.PodPhase { - egressPod1, _ := fakeOvn.watcher.GetPod(egressPod.Namespace, egressPod.Name) - return egressPod1.Status.Phase - }, 5).Should(gomega.Equal(kapi.PodSucceeded)) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - node1GR.Nat = []string{} - expectedDatabaseStatewitCompletedPod := []libovsdbtest.TestData{ + fakeOvn.patchEgressIPObj(node1Name, egressIP1) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + eips, nodes := getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + + expectedNatLogicalPort1 := "k8s-node1" + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-UUID", - }, &nbdb.LogicalRouterPolicy{ + }, + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, node1GR, node1LSP, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, node1Switch} - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewitCompletedPod)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should remove stale pod SNAT referring to wrong logical port after ovnkube-master is started", func() { - app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - egressIP := "192.168.126.25" - node1IPv4 := "192.168.126.12/24" - - egressPod := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: map[string]string{ + "name": eIP.Name, }, + UUID: "reroute-UUID1", }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2"}, + ExternalIDs: map[string]string{ + "name": eIP.Name, }, + UUID: "reroute-UUID2", }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, }, - } - - node1Switch := &nbdb.LogicalSwitch{ - UUID: node1.Name + "-UUID", - Name: node1.Name, - } - node1GR := &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - } - node1LSP := &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, }, - } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - node1GR, - node1LSP, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - node1Switch, - // This is unexpected snat entry where its logical port refers to an unavailable node - // and ensure this entry is removed as soon as ovnk master is up and running. - &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node2"), - Options: map[string]string{ - "stateless": "false", - }, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, + &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: "10.128.0.16", + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, }, - &v1.NodeList{ - Items: []v1.Node{node1}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, - &v1.PodList{ - Items: []v1.Pod{egressPod}, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, }, - ) + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) + node2.Labels = map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } - err := fakeOvn.controller.WatchPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodPortInfo, err := fakeOvn.controller.logicalPortCache.get(&egressPod, types.DefaultNetworkName) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - ePod, err := fakeOvn.fakeClient.KubeClient.CoreV1().Pods(egressPod.Namespace).Get(context.TODO(), egressPod.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressPodIP, err := util.GetPodIPsOfNetwork(ePod, &util.DefaultNetInfo{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressNetPodIP, _, err := net.ParseCIDR(egressPodPortInfo.ips[0].String()) + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: node1Name, + EgressIP: egressIP1, + }, + { + Node: node2Name, + EgressIP: egressIP2, + }, + } + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName, status) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Expect(egressNetPodIP.String()).To(gomega.Equal(egressPodIP[0].String())) - gomega.Expect(egressPodPortInfo.expires.IsZero()).To(gomega.BeTrue()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - egressIPs, nodes := getEgressIPStatus(egressIPName) + + eips, nodes = getEgressIPStatus(egressIPName) gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + gomega.Expect(nodes[1]).To(gomega.Equal(node2.Name)) - podEIPSNAT := &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: egressIP, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: utilpointer.StringPtr("k8s-node1"), - Options: map[string]string{ - "stateless": "false", - }, - } - podReRoutePolicy := &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPodIP[0].String()), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - UUID: "reroute-UUID1", - } - node1GR.Nat = []string{"egressip-nat-UUID1"} - node1LSP.Options = map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - } - expectedDatabaseStatewithPod := []libovsdbtest.TestData{ - podEIPSNAT, &nbdb.LogicalRouterPolicy{ + expectedNatLogicalPort2 := "k8s-node2" + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-UUID", - }, &nbdb.LogicalRouterPolicy{ + }, + &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", - }, podReRoutePolicy, &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1"}, - }, node1GR, node1LSP, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, node1Switch} - - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseStatewithPod)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should only get assigned EgressIPs which matches their subnet when the node is tagged", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := "192.168.126.101" - node1IPv4 := "192.168.128.202/24" - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2", "100.64.0.3"}, + ExternalIDs: map[string]string{ + "name": eIP.Name, }, + UUID: "reroute-UUID1", }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.2", "100.64.0.3"}, + ExternalIDs: map[string]string{ + "name": eIP.Name, }, + UUID: "reroute-UUID2", }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", }, }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: "10.128.0.16", + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, ip1V4Sub, err := net.ParseCIDR(node1IPv4) - _, ip1V6Sub, err := net.ParseCIDR(node1IPv6) - _, ip2V4Sub, err := net.ParseCIDR(node2IPv4) - - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", + &nbdb.NAT{ + UUID: "egressip-nat-UUID3", + LogicalIP: podV4IP, + ExternalIP: eips[1], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort2, + Options: map[string]string{ + "stateless": "false", + }, }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", + &nbdb.NAT{ + UUID: "egressip-nat-UUID4", + LogicalIP: "10.128.0.16", + ExternalIP: eips[1], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort2, + Options: map[string]string{ + "stateless": "false", + }, }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node2.Name, UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID3", "egressip-nat-UUID4"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, &nbdb.LogicalSwitchPort{ @@ -6751,28 +6985,25 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, Type: "router", Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, } gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeFalse()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip1V4Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node1.Name].egressIPConfig.V6.Net).To(gomega.Equal(ip1V6Sub)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache[node2.Name].egressIPConfig.V4.Net).To(gomega.Equal(ip2V4Sub)) - gomega.Eventually(eIP.Status.Items).Should(gomega.HaveLen(0)) - - node1.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } + // remove label from node2 + node2.Labels = map[string]string{} - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.patchEgressIPObj(node1Name, egressIP1) + + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) + expectedDatabaseState = []libovsdbtest.TestData{ &nbdb.LogicalRouterPolicy{ Priority: types.DefaultNoRereoutePriority, @@ -6786,2378 +7017,203 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { Action: nbdb.LogicalRouterPolicyActionAllow, UUID: "no-reroute-service-UUID", }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID1", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": eIP.Name, + }, + UUID: "reroute-UUID2", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID2", + LogicalIP: "10.128.0.16", + ExternalIP: eips[0], + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort1, + Options: map[string]string{ + "stateless": "false", + }, + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID3", + LogicalIP: "10.128.0.16", + ExternalIP: "192.168.126.51", // adds back SNAT towards nodeIP + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", + }, + }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node1.Name, UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, }, &nbdb.LogicalRouter{ Name: ovntypes.GWRouterPrefix + node2.Name, UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID3"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, }, &nbdb.LogicalSwitchPort{ UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should try re-assigning EgressIP until all defined egress IPs are assigned", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - egressIP2 := "192.168.126.102" - node1IPv4 := "192.168.126.12/24" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - - node1.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - _, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should ensure SNATs towards egressIP and nodeIP are correctly configured during egressIP re-assignment", func() { - app.Action = func(ctx *cli.Context) error { - config.Gateway.DisableSNATMultipleGWs = true - - egressIP1 := "192.168.126.101" - egressIP2 := "192.168.126.102" - node1IPv4 := "192.168.126.12/24" - node2IPv4 := "192.168.126.51/24" - - egressPod1 := *newPodWithLabels(namespace, podName, node1Name, podV4IP, egressPodLabel) - egressPod2 := *newPodWithLabels(namespace, "egress-pod2", node2Name, "10.128.0.16", egressPodLabel) - egressNamespace := newNamespace(namespace) - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node1IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.12/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "79fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", node2IPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - "k8s.ovn.org/l3-gateway-config": `{"default":{"mode":"local","mac-address":"7e:57:f8:f0:3c:49", "ip-address":"192.168.126.51/24", "next-hop":"192.168.126.1"}}`, - "k8s.ovn.org/node-chassis-id": "89fdcfc4-6fe6-4cd3-8242-c0f85a4668ec", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - PodSelector: metav1.LabelSelector{ - MatchLabels: egressPodLabel, - }, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": egressNamespace.Name, - }, - }, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1, node2}, - }, - &v1.NamespaceList{ - Items: []v1.Namespace{*egressNamespace}, - }, - &v1.PodList{ - Items: []v1.Pod{egressPod1, egressPod2}, - }, - ) - - i, n, _ := net.ParseCIDR(podV4IP + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod1, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - i, n, _ = net.ParseCIDR("10.128.0.16" + "/23") - n.IP = i - fakeOvn.controller.logicalPortCache.add(&egressPod2, "", types.DefaultNetworkName, "", nil, []*net.IPNet{n}) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeFalse()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - - node1.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeFalse()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - eips, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - - expectedNatLogicalPort1 := "k8s-node1" - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID1", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID2", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: "10.128.0.16", - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - node2.Labels = map[string]string{ - "k8s.ovn.org/egress-assignable": "", - } - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(isEgressAssignableNode(node1.Name)).Should(gomega.BeTrue()) - gomega.Eventually(isEgressAssignableNode(node2.Name)).Should(gomega.BeTrue()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(0)) - - eips, nodes = getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(nodes[1]).To(gomega.Equal(node2.Name)) - - expectedNatLogicalPort2 := "k8s-node2" - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2", "100.64.0.3"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID1", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: []string{"100.64.0.2", "100.64.0.3"}, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID2", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: "10.128.0.16", - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID3", - LogicalIP: podV4IP, - ExternalIP: eips[1], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort2, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID4", - LogicalIP: "10.128.0.16", - ExternalIP: eips[1], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort2, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID3", "egressip-nat-UUID4"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - // remove label from node2 - node2.Labels = map[string]string{} - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node2, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) - - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod1.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID1", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.EgressIPReroutePriority, - Match: fmt.Sprintf("ip4.src == %s", egressPod2.Status.PodIP), - Action: nbdb.LogicalRouterPolicyActionReroute, - Nexthops: nodeLogicalRouterIPv4, - ExternalIDs: map[string]string{ - "name": eIP.Name, - }, - UUID: "reroute-UUID2", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID2", - LogicalIP: "10.128.0.16", - ExternalIP: eips[0], - ExternalIDs: map[string]string{ - "name": egressIPName, - }, - Type: nbdb.NATTypeSNAT, - LogicalPort: &expectedNatLogicalPort1, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID3", - LogicalIP: "10.128.0.16", - ExternalIP: "192.168.126.51", // adds back SNAT towards nodeIP - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID", "reroute-UUID1", "reroute-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1", "egressip-nat-UUID2"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID3"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - // remove label from node1 - node1.Labels = map[string]string{} - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) - gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) // though 2 egressIPs to be re-assigned its only 1 egressIP object - - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID1", - LogicalIP: podV4IP, - ExternalIP: "192.168.126.12", // adds back SNAT towards nodeIP - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.NAT{ - UUID: "egressip-nat-UUID3", - LogicalIP: "10.128.0.16", - ExternalIP: "192.168.126.51", - Type: nbdb.NATTypeSNAT, - Options: map[string]string{ - "stateless": "false", - }, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Nat: []string{"egressip-nat-UUID1"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Nat: []string{"egressip-nat-UUID3"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{"100.64.0.3/29"}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{"100.64.0.2/29"}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should re-balance EgressIPs when their node is removed", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP := "192.168.126.101" - node1IPv4 := "192.168.126.12/24" - node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" - node2IPv4 := "192.168.126.51/24" - - node1 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node2Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{}, - }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - &v1.NodeList{ - Items: []v1.Node{node1}, - }) - - err := fakeOvn.controller.WatchEgressIPNamespaces() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIPPods() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - err = fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - expectedDatabaseState := []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes = getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(2)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - - err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, *metav1.NewDeleteOptions(0)) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPAllocatorSizeSafely).Should(gomega.Equal(1)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).ToNot(gomega.HaveKey(node1.Name)) - gomega.Expect(fakeOvn.controller.eIPC.allocator.cache).To(gomega.HaveKey(node2.Name)) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - - getNewNode := func() string { - _, nodes = getEgressIPStatus(egressIPName) - if len(nodes) > 0 { - return nodes[0] - } - return "" - } - - gomega.Eventually(getNewNode).Should(gomega.Equal(node2.Name)) - egressIPs, _ = getEgressIPStatus(egressIPName) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - expectedDatabaseState = []libovsdbtest.TestData{ - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-UUID", - }, - &nbdb.LogicalRouterPolicy{ - Priority: types.DefaultNoRereoutePriority, - Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), - Action: nbdb.LogicalRouterPolicyActionAllow, - UUID: "no-reroute-service-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1.Name, - UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2.Name, - UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, - }, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, - "nat-addresses": "router", - "exclude-lb-vips-from-garp": "true", - }, - }, - } - gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("egress node update should not mark the node as reachable if there was no label/readiness change", func() { - // When an egress node becomes reachable during a node update event and there is no changes to node labels/readiness - // unassigned egress IP should be eventually added by the periodic reachability check. - // Test steps: - // - disable periodic check from running in background, so it can be called directly from the test - // - assign egress IP to an available node - // - make the node unreachable and verify that the egress IP was unassigned - // - make the node reachable and update a node - // - verify that the egress IP was assigned by calling the periodic reachability check - app.Action = func(ctx *cli.Context) error { - egressIP := "192.168.126.101" - nodeIPv4 := "192.168.126.51/24" - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: node1Name, - Annotations: map[string]string{ - "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\"}", nodeIPv4), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\"]}", v4NodeSubnet), - }, - Labels: map[string]string{ - "k8s.ovn.org/egress-assignable": "", - }, - }, - Status: v1.NodeStatus{ - Conditions: []v1.NodeCondition{ - { - Type: v1.NodeReady, - Status: v1.ConditionTrue, - }, - }, - }, - } - eIP1 := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node.Name, - UUID: ovntypes.GWRouterPrefix + node.Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node.Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node.Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalSwitchPort{ - UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name + "UUID", - Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node.Name, - Type: "router", - Options: map[string]string{ - "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node.Name, - }, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP1}, - }, - &v1.NodeList{ - Items: []v1.Node{node}, - }, - ) - - // Virtually disable background reachability check by using a huge interval - fakeOvn.controller.eIPC.reachabilityCheckInterval = time.Hour - - err := fakeOvn.controller.WatchEgressNodes() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) - egressIPs, _ := getEgressIPStatus(eIP1.Name) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - hcClient := fakeOvn.controller.eIPC.allocator.cache[node.Name].healthClient.(*fakeEgressIPHealthClient) - hcClient.FakeProbeFailure = true - // explicitly call check reachability, periodic checker is not active - checkEgressNodesReachabilityIterate(fakeOvn.controller) - gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(0)) - - hcClient.FakeProbeFailure = false - node.Annotations["test"] = "dummy" - _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(hcClient.IsConnected()).Should(gomega.Equal(true)) - // the node should not be marked as reachable in the update handler as it is not getting added - gomega.Consistently(func() bool { return fakeOvn.controller.eIPC.allocator.cache[node.Name].isReachable }).Should(gomega.Equal(false)) - - // egress IP should get assigned on the next checkEgressNodesReachabilityIterate call - // explicitly call check reachability, periodic checker is not active - checkEgressNodesReachabilityIterate(fakeOvn.controller) - gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) - - return nil - } - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("Dual-stack assignment", func() { - - ginkgo.It("should be able to allocate non-conflicting IPv4 on node which can host it, even if it happens to be the node with more assignments", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - egressIP := "192.168.126.99" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus1"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus1", "192.168.126.102": "bogus2"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - }) - - ginkgo.Context("IPv4 assignment", func() { - - ginkgo.It("Should not be able to assign egress IP defined in CIDR notation", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIPs := []string{"192.168.126.99/32"} - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - validatedIPs, err := fakeOvn.controller.validateEgressIPSpec(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(err).To(gomega.HaveOccurred()) - gomega.Expect(err.Error()).To(gomega.Equal(fmt.Sprintf("unable to parse provided EgressIP: %s, invalid", egressIPs[0]))) - gomega.Expect(validatedIPs).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - }) - - ginkgo.Context("IPv6 assignment", func() { - - ginkgo.It("should be able to allocate non-conflicting IP on node with lowest amount of allocations", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "0:0:0:0:0:feff:c0a8:8e0f" - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should be able to allocate several EgressIPs and avoid the same node", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP1 := "0:0:0:0:0:feff:c0a8:8e0d" - egressIP2 := "0:0:0:0:0:feff:c0a8:8e0f" - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(2)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP1).String())) - gomega.Expect(assignedStatuses[1].Node).To(gomega.Equal(node1.name)) - gomega.Expect(assignedStatuses[1].EgressIP).To(gomega.Equal(net.ParseIP(egressIP2).String())) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should be able to allocate several EgressIPs and avoid the same node and leave one un-assigned without error", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP1 := "0:0:0:0:0:feff:c0a8:8e0d" - egressIP2 := "0:0:0:0:0:feff:c0a8:8e0e" - egressIP3 := "0:0:0:0:0:feff:c0a8:8e0f" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2, egressIP3}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(2)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP1).String())) - gomega.Expect(assignedStatuses[1].Node).To(gomega.Equal(node1.name)) - gomega.Expect(assignedStatuses[1].EgressIP).To(gomega.Equal(net.ParseIP(egressIP2).String())) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should return the already allocated IP with the same node if it is allocated again", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "0:0:0:0:0:feff:c0a8:8e32" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{egressIP: egressIPName, "0:0:0:0:0:feff:c0a8:8e1e": "bogus1"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus2"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - egressIPs := []string{egressIP} - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node1Name)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate node IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "0:0:0:0:0:feff:c0a8:8e0c" - - node1 := setupNode(node1Name, []string{egressIP + "/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate conflicting compressed IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "::feff:c0a8:8e32" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - egressIPs := []string{egressIP} - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate IPv4 IP on nodes which can only host IPv6", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "192.168.126.16" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIPs := []string{egressIP} - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: eIPs, - }, - } - - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should be able to allocate non-conflicting compressed uppercase IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "::FEFF:C0A8:8D32" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(1)) - gomega.Expect(assignedStatuses[0].Node).To(gomega.Equal(node2.name)) - gomega.Expect(assignedStatuses[0].EgressIP).To(gomega.Equal(net.ParseIP(egressIP).String())) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate conflicting compressed uppercase IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIP := "::FEFF:C0A8:8E32" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - egressIPs := []string{egressIP} - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - assignedStatuses := fakeOvn.controller.assignEgressIPs(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not be able to allocate invalid IP", func() { - app.Action = func(ctx *cli.Context) error { - - fakeOvn.start() - - egressIPs := []string{"0:0:0:0:0:feff:c0a8:8e32:5"} - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: egressIPs, - }, - } - - assignedStatuses, err := fakeOvn.controller.validateEgressIPSpec(eIP.Name, eIP.Spec.EgressIPs) - gomega.Expect(err).To(gomega.HaveOccurred()) - gomega.Expect(err.Error()).To(gomega.Equal(fmt.Sprintf("unable to parse provided EgressIP: %s, invalid", egressIPs[0]))) - gomega.Expect(assignedStatuses).To(gomega.HaveLen(0)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("WatchEgressIP", func() { - - ginkgo.It("should update status correctly for single-stack IPv4", func() { - app.Action = func(ctx *cli.Context) error { - fakeOvn.startWithDBSetup(clusterRouterDbSetup) - - egressIP := "192.168.126.10" - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - NamespaceSelector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": "does-not-exist", - }, - }, - }, - } - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should update status correctly for single-stack IPv6", func() { - app.Action = func(ctx *cli.Context) error { - fakeOvn.startWithDBSetup(clusterRouterDbSetup) - - egressIP := "0:0:0:0:0:feff:c0a8:8e0d" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e32": "bogus1", "0:0:0:0:0:feff:c0a8:8e1e": "bogus2"}) - node2 := setupNode(node2Name, []string{"0:0:0:0:0:fedf:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP}, - }, - } - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(net.ParseIP(egressIP).String())) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should update status correctly for dual-stack", func() { - app.Action = func(ctx *cli.Context) error { - fakeOvn.startWithDBSetup(clusterRouterDbSetup) - - egressIPv4 := "192.168.126.101" - egressIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{"0:0:0:0:0:feff:c0a8:8e23": "bogus1"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus2", "192.168.126.102": "bogus3"}) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIPv4, egressIPv6}, - }, - } - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP, metav1.CreateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes).To(gomega.ConsistOf(node2.name, node1.name)) - gomega.Expect(egressIPs).To(gomega.ConsistOf(net.ParseIP(egressIPv6).String(), net.ParseIP(egressIPv4).String())) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("syncEgressIP for dual-stack", func() { - - ginkgo.It("should not update valid assignments", func() { - app.Action = func(ctx *cli.Context) error { - - egressIPv4 := "192.168.126.101" - egressIPv6 := "0:0:0:0:0:feff:c0a8:8e0d" - - node1 := setupNode(node1Name, []string{"0:0:0:0:0:feff:c0a8:8e0c/64"}, map[string]string{}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.102": "bogus3"}) - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIPv4, egressIPv6}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIPv4, - Node: node2.name, - }, - { - EgressIP: net.ParseIP(egressIPv6).String(), - Node: node1.name, - }, - }, - }, - } - - fakeOvn.startWithDBSetup(clusterRouterDbSetup, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes).To(gomega.ConsistOf(eIP.Status.Items[0].Node, eIP.Status.Items[1].Node)) - gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("syncEgressIP for IPv4", func() { - - ginkgo.It("should update invalid assignments on duplicated node", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - egressIP2 := "192.168.126.100" - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{egressIP1: egressIPName, egressIP2: egressIPName}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1, egressIP2}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIP1, - Node: node1.name, - }, - { - EgressIP: egressIP2, - Node: node1.name, - }, - }, - }, - } - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1Name, - UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2Name, - UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - }, - }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(2)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes).To(gomega.ConsistOf(node1.name, node2.name)) - gomega.Expect(egressIPs).To(gomega.ConsistOf(eIP.Status.Items[0].EgressIP, eIP.Status.Items[1].EgressIP)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should update invalid assignments with incorrectly parsed IP", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - egressIPIncorrect := "192.168.126.1000" - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, - }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIPIncorrect, - Node: node1.name, - }, + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1Name, - UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2Name, - UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, - }, - ) + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 + // remove label from node1 + node1.Labels = map[string]string{} - err := fakeOvn.controller.WatchEgressIP() + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Update(context.TODO(), &node1, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should update invalid assignments with unhostable IP on a node", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - egressIPIncorrect := "192.168.128.100" + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status = []egressipv1.EgressIPStatusItem{} + err = fakeOvn.controller.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(0)) + gomega.Eventually(getEgressIPReassignmentCount).Should(gomega.Equal(1)) // though 2 egressIPs to be re-assigned its only 1 egressIP object - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIPIncorrect, - Node: node1.name, - }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID1", + LogicalIP: podV4IP, + ExternalIP: "192.168.126.12", // adds back SNAT towards nodeIP + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", }, }, - } - - fakeOvn.startWithDBSetup( - libovsdbtest.TestSetup{ - NBData: []libovsdbtest.TestData{ - &nbdb.LogicalRouter{ - Name: ovntypes.OVNClusterRouter, - UUID: ovntypes.OVNClusterRouter + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1Name, - UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", - }, - &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2Name, - UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, - }, + &nbdb.NAT{ + UUID: "egressip-nat-UUID3", + LogicalIP: "10.128.0.16", + ExternalIP: "192.168.126.51", + Type: nbdb.NATTypeSNAT, + Options: map[string]string{ + "stateless": "false", }, }, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - - ginkgo.It("should not update valid assignment", func() { - app.Action = func(ctx *cli.Context) error { - - egressIP1 := "192.168.126.101" - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - eIP := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta(egressIPName), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Nat: []string{"egressip-nat-UUID1"}, }, - Status: egressipv1.EgressIPStatus{ - Items: []egressipv1.EgressIPStatusItem{ - { - EgressIP: egressIP1, - Node: node1.name, - }, - }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Nat: []string{"egressip-nat-UUID3"}, }, - } - - fakeOvn.startWithDBSetup(clusterRouterDbSetup, - &egressipv1.EgressIPList{ - Items: []egressipv1.EgressIP{eIP}, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{"100.64.0.3/29"}, }, - ) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node1.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) - - return nil - } - - err := app.Run([]string{app.Name}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - }) - }) - - ginkgo.Context("AddEgressIP for IPv4", func() { - - ginkgo.It("should not create two EgressIPs with same egress IP value", func() { - app.Action = func(ctx *cli.Context) error { - egressIP1 := "192.168.126.101" - - node1 := setupNode(node1Name, []string{"192.168.126.12/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) - - eIP1 := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta("egressip"), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{"100.64.0.2/29"}, }, - } - eIP2 := egressipv1.EgressIP{ - ObjectMeta: newEgressIPMeta("egressip2"), - Spec: egressipv1.EgressIPSpec{ - EgressIPs: []string{egressIP1}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, }, } - - fakeOvn.startWithDBSetup(clusterRouterDbSetup) - - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - - err := fakeOvn.controller.WatchEgressIP() - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP1, metav1.CreateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP1.Name)).Should(gomega.Equal(1)) - egressIPs, nodes := getEgressIPStatus(eIP1.Name) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) - gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP1)) - - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP2, metav1.CreateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - - gomega.Eventually(getEgressIPStatusLen(eIP2.Name)).Should(gomega.Equal(0)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -9166,85 +7222,298 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) - }) - - ginkgo.Context("UpdateEgressIP for IPv4", func() { - - ginkgo.It("should perform re-assingment of EgressIPs", func() { + ginkgo.It("should re-balance EgressIPs when their node is removed", func() { app.Action = func(ctx *cli.Context) error { egressIP := "192.168.126.101" - updateEgressIP := "192.168.126.10" + node1IPv4 := "192.168.126.12/24" + node1IPv6 := "0:0:0:0:0:feff:c0a8:8e0c/64" + node2IPv4 := "192.168.126.51/24" - node1 := setupNode(node1Name, []string{"192.168.126.41/24"}, map[string]string{"192.168.126.102": "bogus1", "192.168.126.111": "bogus2"}) - node2 := setupNode(node2Name, []string{"192.168.126.51/24"}, map[string]string{"192.168.126.68": "bogus3"}) + annotations := map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node1IPv4, node1IPv6), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":[\"%s\", \"%s\"]}", v4NodeSubnet, v6NodeSubnet), + } + labels := map[string]string{ + "k8s.ovn.org/egress-assignable": "", + } + node1 := getNodeObj(node1Name, annotations, labels) + annotations = map[string]string{ + "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", node2IPv4, ""), + "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\"}", v4NodeSubnet), + } + node2 := getNodeObj(node2Name, annotations, labels) - eIP1 := egressipv1.EgressIP{ + eIP := egressipv1.EgressIP{ ObjectMeta: newEgressIPMeta(egressIPName), Spec: egressipv1.EgressIPSpec{ EgressIPs: []string{egressIP}, }, + Status: egressipv1.EgressIPStatus{ + Items: []egressipv1.EgressIPStatusItem{}, + }, } + fakeOvn.startWithDBSetup( libovsdbtest.TestSetup{ NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, &nbdb.LogicalRouter{ Name: ovntypes.OVNClusterRouter, UUID: ovntypes.OVNClusterRouter + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node1Name, - UUID: ovntypes.GWRouterPrefix + node1Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", }, &nbdb.LogicalRouter{ - Name: ovntypes.GWRouterPrefix + node2Name, - UUID: ovntypes.GWRouterPrefix + node2Name + "-UUID", + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1Name, - Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, }, - &nbdb.LogicalRouterPort{ - UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name + "-UUID", - Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2Name, - Networks: []string{nodeLogicalRouterIfAddrV4}, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, }, }, }, - ) + &egressipv1.EgressIPList{ + Items: []egressipv1.EgressIP{eIP}, + }, + &v1.NodeList{ + Items: []v1.Node{node1}, + }) - fakeOvn.controller.eIPC.allocator.cache[node1.name] = &node1 - fakeOvn.controller.eIPC.allocator.cache[node2.name] = &node2 - err := fakeOvn.controller.WatchEgressIP() + err := fakeOvn.controller.WatchEgressIPNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIPPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressNodes() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchEgressIP() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Create(context.TODO(), &eIP1, metav1.CreateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) - + expectedDatabaseState := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + fakeOvn.patchEgressIPObj(node1Name, egressIP) gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) egressIPs, nodes := getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - eIPToUpdate, err := fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Get(context.TODO(), eIP1.Name, metav1.GetOptions{}) + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Create(context.TODO(), &node2, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - eIPToUpdate.Spec.EgressIPs = []string{updateEgressIP} + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) - _, err = fakeOvn.fakeClient.EgressIPClient.K8sV1().EgressIPs().Update(context.TODO(), eIPToUpdate, metav1.UpdateOptions{}) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + egressIPs, nodes = getEgressIPStatus(egressIPName) + gomega.Expect(nodes[0]).To(gomega.Equal(node1.Name)) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) - getEgressIP := func() string { - egressIPs, _ = getEgressIPStatus(egressIPName) - if len(egressIPs) == 0 { - return "try again" + err = fakeOvn.fakeClient.KubeClient.CoreV1().Nodes().Delete(context.TODO(), node1.Name, *metav1.NewDeleteOptions(0)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.patchEgressIPObj(node2Name, egressIP) + gomega.Eventually(getEgressIPStatusLen(egressIPName)).Should(gomega.Equal(1)) + + getNewNode := func() string { + _, nodes = getEgressIPStatus(egressIPName) + if len(nodes) > 0 { + return nodes[0] } - return egressIPs[0] + return "" } - gomega.Eventually(getEgressIP).Should(gomega.Equal(updateEgressIP)) - _, nodes = getEgressIPStatus(egressIPName) - gomega.Expect(nodes[0]).To(gomega.Equal(node2.name)) + gomega.Eventually(getNewNode).Should(gomega.Equal(node2.Name)) + egressIPs, _ = getEgressIPStatus(egressIPName) + gomega.Expect(egressIPs[0]).To(gomega.Equal(egressIP)) + + expectedDatabaseState = []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node1.Name, + Networks: []string{nodeLogicalRouterIfAddrV4, nodeLogicalRouterIfAddrV6}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + node2.Name, + Networks: []string{nodeLogicalRouterIfAddrV4}, + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: "ip4.src == 10.128.0.0/14 && ip4.dst == 10.128.0.0/14", + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-UUID", + }, + &nbdb.LogicalRouterPolicy{ + Priority: types.DefaultNoRereoutePriority, + Match: fmt.Sprintf("ip4.src == 10.128.0.0/14 && ip4.dst == %s", config.Gateway.V4JoinSubnet), + Action: nbdb.LogicalRouterPolicyActionAllow, + UUID: "no-reroute-service-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"no-reroute-UUID", "no-reroute-service-UUID"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node1.Name, + UUID: ovntypes.GWRouterPrefix + node1.Name + "-UUID", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.GWRouterPrefix + node2.Name, + UUID: ovntypes.GWRouterPrefix + node2.Name + "-UUID", + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node1Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node1Name, + }, + }, + &nbdb.LogicalSwitchPort{ + UUID: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name + "UUID", + Name: types.EXTSwitchToGWRouterPrefix + types.GWRouterPrefix + node2Name, + Type: "router", + Options: map[string]string{ + "router-port": types.GWRouterToExtSwitchPrefix + "GR_" + node2Name, + "nat-addresses": "router", + "exclude-lb-vips-from-garp": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) return nil } @@ -9253,3 +7522,63 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations", func() { }) }) }) + +// TEST UTILITY FUNCTIONS; +// reduces redundant code + +func getEIPSNAT(podIP, egressIP, expectedNatLogicalPort string) *nbdb.NAT { + return &nbdb.NAT{ + UUID: "egressip-nat-UUID", + LogicalIP: podIP, + ExternalIP: egressIP, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + Type: nbdb.NATTypeSNAT, + LogicalPort: &expectedNatLogicalPort, + Options: map[string]string{ + "stateless": "false", + }, + } +} + +func getReRoutePolicy(podIP, ipFamily string, nodeLogicalRouterIPv4 []string) *nbdb.LogicalRouterPolicy { + return &nbdb.LogicalRouterPolicy{ + Priority: types.EgressIPReroutePriority, + Match: fmt.Sprintf("ip%s.src == %s", ipFamily, podIP), + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: nodeLogicalRouterIPv4, + ExternalIDs: map[string]string{ + "name": egressIPName, + }, + UUID: "reroute-UUID", + } +} + +func getReRouteStaticRoute(podIP, nextHop string) *nbdb.LogicalRouterStaticRoute { + return &nbdb.LogicalRouterStaticRoute{ + ExternalIDs: map[string]string{"name": egressIPName}, + Nexthop: nextHop, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + IPPrefix: podIP, + UUID: "reroute-static-route-UUID", + } +} + +func getNodeObj(nodeName string, annotations, labels map[string]string) v1.Node { + return v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + Annotations: annotations, + Labels: labels, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + { + Type: v1.NodeReady, + Status: v1.ConditionTrue, + }, + }, + }, + } +} diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index d10ca81cc2..bd3ec1410a 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -34,10 +34,7 @@ import ( "k8s.io/klog/v2" ) -const ( - egressFirewallDNSDefaultDuration = 30 * time.Minute - egressIPReachabilityCheckInterval = 5 * time.Second -) +const egressFirewallDNSDefaultDuration = 30 * time.Minute // ACL logging severity levels type ACLLoggingLevels struct { @@ -301,13 +298,6 @@ func (oc *DefaultNetworkController) WatchEgressFwNodes() error { return err } -// WatchCloudPrivateIPConfig starts the watching of cloudprivateipconfigs -// resource and calls back the appropriate handler logic. -func (oc *DefaultNetworkController) WatchCloudPrivateIPConfig() error { - _, err := oc.retryCloudPrivateIPConfig.WatchResource() - return err -} - // WatchEgressIP starts the watching of egressip resource and calls back the // appropriate handler logic. It also initiates the other dedicated resource // handlers for egress IP setup: namespaces, pods. @@ -494,10 +484,10 @@ func (oc *DefaultNetworkController) InitEgressServiceController() (*egresssvc.Co } if hcPort == 0 { - return isReachableLegacy(nodeName, mgmtIPs, timeout) + return egresssvc.IsReachableLegacy(nodeName, mgmtIPs, timeout) } - return isReachableViaGRPC(mgmtIPs, healthClient, hcPort, timeout) + return egresssvc.IsReachableViaGRPC(mgmtIPs, healthClient, hcPort, timeout) } return egresssvc.NewController(DefaultNetworkControllerName, oc.client, oc.nbClient, oc.addressSetFactory, diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index 53f5caacea..d6089023a3 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -18,6 +18,7 @@ import ( egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressip "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" + egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" egressipfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" egressqos "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1" egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" @@ -225,7 +226,6 @@ func NewOvnController(ovnClient *util.OVNMasterClientset, wf *factory.WatchFacto Kube: kube.Kube{KClient: ovnClient.KubeClient}, EIPClient: ovnClient.EgressIPClient, EgressFirewallClient: ovnClient.EgressFirewallClient, - CloudNetworkClient: ovnClient.CloudNetworkClient, EgressServiceClient: ovnClient.EgressServiceClient, }, wf, @@ -331,7 +331,6 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt Kube: kube.Kube{KClient: o.fakeClient.KubeClient}, EIPClient: o.fakeClient.EgressIPClient, EgressFirewallClient: o.fakeClient.EgressFirewallClient, - CloudNetworkClient: o.fakeClient.CloudNetworkClient, }, o.watcher, o.fakeRecorder, @@ -380,3 +379,17 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt secondaryController.AddNAD(nadName) return nil } + +func (o *FakeOVN) patchEgressIPObj(nodeName, egressIP string) { + // NOTE: Cluster manager is the one who patches the egressIP object. + // For the sake of unit testing egressip zone controller we need to patch egressIP object manually + // There are tests in cluster-manager package covering the patch logic. + status := []egressipv1.EgressIPStatusItem{ + { + Node: nodeName, + EgressIP: egressIP, + }, + } + err := o.controller.patchReplaceEgressIPStatus(egressIPName, status) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} diff --git a/go-controller/pkg/syncmap/syncmap.go b/go-controller/pkg/syncmap/syncmap.go index 97f4c45845..bc59ef6cc0 100644 --- a/go-controller/pkg/syncmap/syncmap.go +++ b/go-controller/pkg/syncmap/syncmap.go @@ -140,6 +140,14 @@ func (c *SyncMap[T]) LoadOrStore(lockedKey string, newEntry T) (value T, loaded } } +// Store sets the value for a key. +// If key-value was already present, it will be over-written +func (c *SyncMap[T]) Store(lockedKey string, newEntry T) { + c.entriesMutex.Lock() + defer c.entriesMutex.Unlock() + c.entries[lockedKey] = newEntry +} + // Delete deletes object from the entries map func (c *SyncMap[T]) Delete(lockedKey string) { c.entriesMutex.Lock() diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 4423c799d1..0aead0e5dc 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -55,7 +55,6 @@ type OVNMasterClientset struct { KubeClient kubernetes.Interface EgressIPClient egressipclientset.Interface EgressFirewallClient egressfirewallclientset.Interface - CloudNetworkClient ocpcloudnetworkclientset.Interface EgressQoSClient egressqosclientset.Interface MultiNetworkPolicyClient multinetworkpolicyclientset.Interface EgressServiceClient egressserviceclientset.Interface @@ -78,7 +77,6 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { KubeClient: cs.KubeClient, EgressIPClient: cs.EgressIPClient, EgressFirewallClient: cs.EgressFirewallClient, - CloudNetworkClient: cs.CloudNetworkClient, EgressQoSClient: cs.EgressQoSClient, MultiNetworkPolicyClient: cs.MultiNetworkPolicyClient, EgressServiceClient: cs.EgressServiceClient, diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index ea6c426622..d1400ca5ae 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -1076,19 +1076,26 @@ spec: ginkgo.By("7. Check the OVN DB to ensure no SNATs are added for the standby egressIP") dbPods, err := framework.RunKubectl("ovn-kubernetes", "get", "pods", "-l", "name=ovnkube-db", "-o=jsonpath='{.items..metadata.name}'") + dbContainerName := "nb-ovsdb" + if isInterconnectEnabled() { + dbPods, err = framework.RunKubectl("ovn-kubernetes", "get", "pods", "-l", "name=ovnkube-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", egress1Node.name), "-o=jsonpath='{.items..metadata.name}'") + } if err != nil || len(dbPods) == 0 { framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) } dbPod := strings.Split(dbPods, " ")[0] dbPod = strings.TrimPrefix(dbPod, "'") dbPod = strings.TrimSuffix(dbPod, "'") + if len(dbPod) == 0 { + framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) + } logicalIP := fmt.Sprintf("logical_ip=%s", srcPodIP.String()) - snats, err := framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", "nb-ovsdb", "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) + snats, err := framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) if err != nil { framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) } if !strings.Contains(snats, statuses[0].EgressIP) || strings.Contains(snats, egressIP3.String()) { - framework.Failf("Step 7. Check that the second egressIP object is assigned to node2 (pod2Node/egress1Node), failed") + framework.Failf("Step 7. Check the OVN DB to ensure no SNATs are added for the standby egressIP, failed") } ginkgo.By("8. Check connectivity from pod to an external container and verify that the srcIP is the expected egressIP from object1") @@ -1147,7 +1154,7 @@ spec: framework.ExpectNoError(err, "Step 11. Check connectivity from pod to an external container and verify that the srcIP is the expected standby egressIP3 from object2, failed: %v", err) ginkgo.By("12. Check the OVN DB to ensure SNATs are added for only the standby egressIP") - snats, err = framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", "nb-ovsdb", "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) + snats, err = framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) if err != nil { framework.Failf("Error: Check the OVN DB to ensure SNATs are added for only the standby egressIP, err: %v", err) } @@ -1181,8 +1188,21 @@ spec: }) framework.ExpectNoError(err, "Step 14. Ensure egressIP1 from egressIP object1 and egressIP3 from object2 is correctly transferred to egress2Node, failed: %v", err) + if isInterconnectEnabled() { + dbPods, err = framework.RunKubectl("ovn-kubernetes", "get", "pods", "-l", "name=ovnkube-node", "--field-selector", fmt.Sprintf("spec.nodeName=%s", egress2Node.name), "-o=jsonpath='{.items..metadata.name}'") + } + if err != nil || len(dbPods) == 0 { + framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) + } + dbPod = strings.Split(dbPods, " ")[0] + dbPod = strings.TrimPrefix(dbPod, "'") + dbPod = strings.TrimSuffix(dbPod, "'") + if len(dbPod) == 0 { + framework.Failf("Error: Check the OVN DB to ensure no SNATs are added for the standby egressIP, err: %v", err) + } + ginkgo.By("15. Check the OVN DB to ensure SNATs are added for either egressIP1 or egressIP3") - snats, err = framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", "nb-ovsdb", "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) + snats, err = framework.RunKubectl("ovn-kubernetes", "exec", dbPod, "-c", dbContainerName, "--", "ovn-nbctl", "--no-leader-only", "--columns=external_ip", "find", "nat", logicalIP) if err != nil { framework.Failf("Error: Check the OVN DB to ensure SNATs are added for either egressIP1 or egressIP3, err: %v", err) } diff --git a/test/e2e/util.go b/test/e2e/util.go index ccb635aac4..14e0a1e4f2 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1061,3 +1061,8 @@ func randStr(n int) string { } return string(b) } + +func isInterconnectEnabled() bool { + val, present := os.LookupEnv("OVN_INTERCONNECT_ENABLE") + return present && val == "true" +} From b74b3aa5b0340082a53bd4a424efd70a9afbc7d6 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Wed, 24 May 2023 11:26:46 +0200 Subject: [PATCH 05/73] EIP: Move relevant metrics to CM This is the metrics refactor for interconnect following the work done in https://github.com/ovn-org/ovn-kubernetes/pull/3386. Signed-off-by: Surya Seetharaman --- go-controller/pkg/metrics/cluster_manager.go | 50 ++++++++++++++++++++ go-controller/pkg/metrics/master.go | 40 ---------------- 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/go-controller/pkg/metrics/cluster_manager.go b/go-controller/pkg/metrics/cluster_manager.go index c00c9b63fc..dc69a50a17 100644 --- a/go-controller/pkg/metrics/cluster_manager.go +++ b/go-controller/pkg/metrics/cluster_manager.go @@ -54,6 +54,30 @@ var metricV6AllocatedHostSubnetCount = prometheus.NewGauge(prometheus.GaugeOpts{ Help: "The total number of v6 host subnets currently allocated", }) +/** EgressIP metrics recorded from cluster-manager begins**/ +var metricEgressIPCount = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemClusterManager, + Name: "num_egress_ips", + Help: "The number of defined egress IP addresses", +}) + +var metricEgressIPNodeUnreacheableCount = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemClusterManager, + Name: "egress_ips_node_unreachable_total", + Help: "The total number of times assigned egress IP(s) were unreachable"}, +) + +var metricEgressIPRebalanceCount = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemClusterManager, + Name: "egress_ips_rebalance_total", + Help: "The total number of times assigned egress IP(s) needed to be moved to a different node"}, +) + +/** EgressIP metrics recorded from cluster-manager ends**/ + // RegisterClusterManagerBase registers ovnkube cluster manager base metrics with the Prometheus registry. // This function should only be called once. func RegisterClusterManagerBase() { @@ -88,6 +112,11 @@ func RegisterClusterManagerFunctional() { prometheus.MustRegister(metricV6HostSubnetCount) prometheus.MustRegister(metricV4AllocatedHostSubnetCount) prometheus.MustRegister(metricV6AllocatedHostSubnetCount) + if config.OVNKubernetesFeature.EnableEgressIP { + prometheus.MustRegister(metricEgressIPNodeUnreacheableCount) + prometheus.MustRegister(metricEgressIPRebalanceCount) + prometheus.MustRegister(metricEgressIPCount) + } } func UnregisterClusterManagerFunctional() { @@ -95,6 +124,11 @@ func UnregisterClusterManagerFunctional() { prometheus.Unregister(metricV6HostSubnetCount) prometheus.Unregister(metricV4AllocatedHostSubnetCount) prometheus.Unregister(metricV6AllocatedHostSubnetCount) + if config.OVNKubernetesFeature.EnableEgressIP { + prometheus.Unregister(metricEgressIPNodeUnreacheableCount) + prometheus.Unregister(metricEgressIPRebalanceCount) + prometheus.Unregister(metricEgressIPCount) + } } // RecordSubnetUsage records the number of subnets allocated for nodes @@ -109,3 +143,19 @@ func RecordSubnetCount(v4SubnetCount, v6SubnetCount float64) { metricV4HostSubnetCount.Set(v4SubnetCount) metricV6HostSubnetCount.Set(v6SubnetCount) } + +// RecordEgressIPReachableNode records how many times EgressIP detected an unuseable node. +func RecordEgressIPUnreachableNode() { + metricEgressIPNodeUnreacheableCount.Inc() +} + +// RecordEgressIPRebalance records how many EgressIPs had to move to a different egress node. +func RecordEgressIPRebalance(count int) { + metricEgressIPRebalanceCount.Add(float64(count)) +} + +// RecordEgressIPCount records the total number of Egress IPs. +// This total may include multiple Egress IPs per EgressIP CR. +func RecordEgressIPCount(count float64) { + metricEgressIPCount.Set(count) +} diff --git a/go-controller/pkg/metrics/master.go b/go-controller/pkg/metrics/master.go index 3c1e3f231b..1e562b8bb1 100644 --- a/go-controller/pkg/metrics/master.go +++ b/go-controller/pkg/metrics/master.go @@ -164,13 +164,6 @@ var MetricMasterLeader = prometheus.NewGauge(prometheus.GaugeOpts{ Help: "Identifies whether the instance of ovnkube-master is a leader(1) or not(0).", }) -var metricEgressIPCount = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: MetricOvnkubeNamespace, - Subsystem: MetricOvnkubeSubsystemMaster, - Name: "num_egress_ips", - Help: "The number of defined egress IP addresses", -}) - var metricEgressIPAssignLatency = prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: MetricOvnkubeNamespace, Subsystem: MetricOvnkubeSubsystemMaster, @@ -187,20 +180,6 @@ var metricEgressIPUnassignLatency = prometheus.NewHistogram(prometheus.Histogram Buckets: prometheus.ExponentialBuckets(.001, 2, 15), }) -var metricEgressIPNodeUnreacheableCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: MetricOvnkubeNamespace, - Subsystem: MetricOvnkubeSubsystemMaster, - Name: "egress_ips_node_unreachable_total", - Help: "The total number of times assigned egress IP(s) were unreachable"}, -) - -var metricEgressIPRebalanceCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: MetricOvnkubeNamespace, - Subsystem: MetricOvnkubeSubsystemMaster, - Name: "egress_ips_rebalance_total", - Help: "The total number of times assigned egress IP(s) needed to be moved to a different node"}, -) - var metricNetpolEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: MetricOvnkubeNamespace, Subsystem: MetricOvnkubeSubsystemMaster, @@ -412,7 +391,6 @@ func RegisterMasterPerformance(nbClient libovsdbclient.Client) { // LE is won. func RegisterMasterFunctional() { // No need to unregister because process exits when leadership is lost. - prometheus.MustRegister(metricEgressIPCount) if config.Metrics.EnableScaleMetrics { klog.Infof("Scale metrics are enabled") prometheus.MustRegister(metricEgressIPAssignLatency) @@ -424,8 +402,6 @@ func RegisterMasterFunctional() { prometheus.MustRegister(metricPodSelectorAddrSetNamespaceEventLatency) prometheus.MustRegister(metricPodEventLatency) } - prometheus.MustRegister(metricEgressIPNodeUnreacheableCount) - prometheus.MustRegister(metricEgressIPRebalanceCount) prometheus.MustRegister(metricEgressFirewallRuleCount) prometheus.MustRegister(metricEgressFirewallCount) prometheus.MustRegister(metricEgressRoutingViaHost) @@ -505,12 +481,6 @@ func RecordPodCreated(pod *kapi.Pod, netInfo util.NetInfo) { } } -// RecordEgressIPCount records the total number of Egress IPs. -// This total may include multiple Egress IPs per EgressIP CR. -func RecordEgressIPCount(count float64) { - metricEgressIPCount.Set(count) -} - // RecordEgressIPAssign records how long it took EgressIP to configure OVN. func RecordEgressIPAssign(duration time.Duration) { metricEgressIPAssignLatency.Observe(duration.Seconds()) @@ -521,16 +491,6 @@ func RecordEgressIPUnassign(duration time.Duration) { metricEgressIPUnassignLatency.Observe(duration.Seconds()) } -// RecordEgressIPReachableNode records how many times EgressIP detected an unuseable node. -func RecordEgressIPUnreachableNode() { - metricEgressIPNodeUnreacheableCount.Inc() -} - -// RecordEgressIPRebalance records how many EgressIPs had to move to a different egress node. -func RecordEgressIPRebalance(count int) { - metricEgressIPRebalanceCount.Add(float64(count)) -} - func RecordNetpolEvent(eventName string, duration time.Duration) { metricNetpolEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) } From df58e4f69fe48bc7ffa8cd230d3d6d1af6d0a708 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Wed, 24 May 2023 17:49:46 +0200 Subject: [PATCH 06/73] Add doc changes for IC metrics move Ensuring the work done here: https://github.com/martinkennelly/ovn-kubernetes-1/commit/c47ed896d6eef1e78844cc258deafd20502c348b sees light. Co-Authored-by: Martin Kennelly Signed-off-by: Surya Seetharaman --- docs/metrics.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/docs/metrics.md b/docs/metrics.md index 14d293fb49..797b7d35eb 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -15,8 +15,17 @@ Measurement accuracy can be impacted by other parallel processing that might be |ovnkube_master_network_programming_ovn_duration_seconds| Histogram | The duration for OVN to apply network configuration for a kind (e.g. pod, service, networkpolicy). ## Change log -This list is to help notify if there are additions, changes or removals to metrics. +This list is to help notify if there are additions, changes or removals to metrics. Latest changes are at the top of this list. +- Effect of OVN IC architecture: + - Move the following metrics from subsystem "master" to subsystem "clustermanager". Therefore, the follow metrics are renamed. + - `ovnkube_master_num_v4_host_subnets` -> `ovnkube_clustermanager_num_v4_host_subnets` + - `ovnkube_master_num_v6_host_subnets` -> `ovnkube_clustermanager_num_v6_host_subnets` + - `ovnkube_master_allocated_v4_host_subnets` -> `ovnkube_clustermanager_allocated_v4_host_subnets` + - `ovnkube_master_allocated_v6_host_subnets` -> `ovnkube_clustermanager_allocated_v6_host_subnets` + - `ovnkube_master_num_egress_ips` -> `ovnkube_clustermanager_num_egress_ips` + - `ovnkube_master_egress_ips_node_unreachable_total` -> `ovnkube_clustermanager_egress_ips_node_unreachable_total` + - `ovnkube_master_egress_ips_rebalance_total` -> `ovnkube_clustermanager_egress_ips_rebalance_total` - Update description of ovnkube_master_pod_creation_latency_seconds - Add libovsdb metrics - ovnkube_master_libovsdb_disconnects_total and ovnkube_master_libovsdb_monitors. - Add ovn_controller_southbound_database_connected metric (https://github.com/ovn-org/ovn-kubernetes/pull/3117). From 39bdf00aeef9572b4c723dcd082aae47d539c811 Mon Sep 17 00:00:00 2001 From: jordigilh Date: Sat, 8 Apr 2023 10:50:45 -0400 Subject: [PATCH 07/73] Add Admin Policy Based External Route CRD and related generated code (informer,lister,api) Signed-off-by: jordigilh --- .../v1/apis/clientset/versioned/clientset.go | 96 ++++++++ .../v1/apis/clientset/versioned/doc.go | 19 ++ .../versioned/fake/clientset_generated.go | 84 +++++++ .../v1/apis/clientset/versioned/fake/doc.go | 19 ++ .../apis/clientset/versioned/fake/register.go | 55 +++++ .../v1/apis/clientset/versioned/scheme/doc.go | 19 ++ .../clientset/versioned/scheme/register.go | 55 +++++ .../v1/adminpolicybasedexternalroute.go | 183 +++++++++++++++ .../v1/adminpolicybasedroute_client.go | 88 +++++++ .../typed/adminpolicybasedroute/v1/doc.go | 19 ++ .../adminpolicybasedroute/v1/fake/doc.go | 19 ++ .../fake_adminpolicybasedexternalroute.go | 132 +++++++++++ .../fake/fake_adminpolicybasedroute_client.go | 39 ++++ .../v1/generated_expansion.go | 20 ++ .../adminpolicybasedroute/interface.go | 45 ++++ .../v1/adminpolicybasedexternalroute.go | 88 +++++++ .../adminpolicybasedroute/v1/interface.go | 44 ++++ .../informers/externalversions/factory.go | 179 ++++++++++++++ .../informers/externalversions/generic.go | 61 +++++ .../internalinterfaces/factory_interfaces.go | 39 ++++ .../v1/adminpolicybasedexternalroute.go | 67 ++++++ .../v1/expansion_generated.go | 22 ++ .../pkg/crd/adminpolicybasedroute/v1/doc.go | 4 + .../crd/adminpolicybasedroute/v1/register.go | 29 +++ .../pkg/crd/adminpolicybasedroute/v1/types.go | 166 +++++++++++++ .../v1/zz_generated.deepcopy.go | 220 ++++++++++++++++++ 26 files changed, 1811 insertions(+) create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/clientset.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/clientset_generated.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/register.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/register.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedroute_client.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedexternalroute.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedroute_client.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/generated_expansion.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/interface.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/interface.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/factory.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/generic.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/expansion_generated.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/doc.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/register.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/types.go create mode 100644 go-controller/pkg/crd/adminpolicybasedroute/v1/zz_generated.deepcopy.go diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/clientset.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/clientset.go new file mode 100644 index 0000000000..c6c8453aed --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/clientset.go @@ -0,0 +1,96 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package versioned + +import ( + "fmt" + + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1" + discovery "k8s.io/client-go/discovery" + rest "k8s.io/client-go/rest" + flowcontrol "k8s.io/client-go/util/flowcontrol" +) + +type Interface interface { + Discovery() discovery.DiscoveryInterface + K8sV1() k8sv1.K8sV1Interface +} + +// Clientset contains the clients for groups. Each group has exactly one +// version included in a Clientset. +type Clientset struct { + *discovery.DiscoveryClient + k8sV1 *k8sv1.K8sV1Client +} + +// K8sV1 retrieves the K8sV1Client +func (c *Clientset) K8sV1() k8sv1.K8sV1Interface { + return c.k8sV1 +} + +// Discovery retrieves the DiscoveryClient +func (c *Clientset) Discovery() discovery.DiscoveryInterface { + if c == nil { + return nil + } + return c.DiscoveryClient +} + +// NewForConfig creates a new Clientset for the given config. +// If config's RateLimiter is not set and QPS and Burst are acceptable, +// NewForConfig will generate a rate-limiter in configShallowCopy. +func NewForConfig(c *rest.Config) (*Clientset, error) { + configShallowCopy := *c + if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 { + if configShallowCopy.Burst <= 0 { + return nil, fmt.Errorf("burst is required to be greater than 0 when RateLimiter is not set and QPS is set to greater than 0") + } + configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst) + } + var cs Clientset + var err error + cs.k8sV1, err = k8sv1.NewForConfig(&configShallowCopy) + if err != nil { + return nil, err + } + + cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfig(&configShallowCopy) + if err != nil { + return nil, err + } + return &cs, nil +} + +// NewForConfigOrDie creates a new Clientset for the given config and +// panics if there is an error in the config. +func NewForConfigOrDie(c *rest.Config) *Clientset { + var cs Clientset + cs.k8sV1 = k8sv1.NewForConfigOrDie(c) + + cs.DiscoveryClient = discovery.NewDiscoveryClientForConfigOrDie(c) + return &cs +} + +// New creates a new Clientset for the given RESTClient. +func New(c rest.Interface) *Clientset { + var cs Clientset + cs.k8sV1 = k8sv1.New(c) + + cs.DiscoveryClient = discovery.NewDiscoveryClient(c) + return &cs +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/doc.go new file mode 100644 index 0000000000..518bc288b3 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated clientset. +package versioned diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/clientset_generated.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/clientset_generated.go new file mode 100644 index 0000000000..b7b1d957c8 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/clientset_generated.go @@ -0,0 +1,84 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + clientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1" + fakek8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/discovery" + fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/testing" +) + +// NewSimpleClientset returns a clientset that will respond with the provided objects. +// It's backed by a very simple object tracker that processes creates, updates and deletions as-is, +// without applying any validations and/or defaults. It shouldn't be considered a replacement +// for a real clientset and is mostly useful in simple unit tests. +func NewSimpleClientset(objects ...runtime.Object) *Clientset { + o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder()) + for _, obj := range objects { + if err := o.Add(obj); err != nil { + panic(err) + } + } + + cs := &Clientset{tracker: o} + cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake} + cs.AddReactor("*", "*", testing.ObjectReaction(o)) + cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) { + gvr := action.GetResource() + ns := action.GetNamespace() + watch, err := o.Watch(gvr, ns) + if err != nil { + return false, nil, err + } + return true, watch, nil + }) + + return cs +} + +// Clientset implements clientset.Interface. Meant to be embedded into a +// struct to get a default implementation. This makes faking out just the method +// you want to test easier. +type Clientset struct { + testing.Fake + discovery *fakediscovery.FakeDiscovery + tracker testing.ObjectTracker +} + +func (c *Clientset) Discovery() discovery.DiscoveryInterface { + return c.discovery +} + +func (c *Clientset) Tracker() testing.ObjectTracker { + return c.tracker +} + +var ( + _ clientset.Interface = &Clientset{} + _ testing.FakeClient = &Clientset{} +) + +// K8sV1 retrieves the K8sV1Client +func (c *Clientset) K8sV1() k8sv1.K8sV1Interface { + return &fakek8sv1.FakeK8sV1{Fake: &c.Fake} +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/doc.go new file mode 100644 index 0000000000..19e0028ffb --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated fake clientset. +package fake diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/register.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/register.go new file mode 100644 index 0000000000..d98971e92b --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake/register.go @@ -0,0 +1,55 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + serializer "k8s.io/apimachinery/pkg/runtime/serializer" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" +) + +var scheme = runtime.NewScheme() +var codecs = serializer.NewCodecFactory(scheme) + +var localSchemeBuilder = runtime.SchemeBuilder{ + k8sv1.AddToScheme, +} + +// AddToScheme adds all types of this clientset into the given scheme. This allows composition +// of clientsets, like in: +// +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) +// +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// +// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types +// correctly. +var AddToScheme = localSchemeBuilder.AddToScheme + +func init() { + v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"}) + utilruntime.Must(AddToScheme(scheme)) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/doc.go new file mode 100644 index 0000000000..1aec4021fc --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package contains the scheme of the automatically generated clientset. +package scheme diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/register.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/register.go new file mode 100644 index 0000000000..8b6a438be5 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme/register.go @@ -0,0 +1,55 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package scheme + +import ( + k8sv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + serializer "k8s.io/apimachinery/pkg/runtime/serializer" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" +) + +var Scheme = runtime.NewScheme() +var Codecs = serializer.NewCodecFactory(Scheme) +var ParameterCodec = runtime.NewParameterCodec(Scheme) +var localSchemeBuilder = runtime.SchemeBuilder{ + k8sv1.AddToScheme, +} + +// AddToScheme adds all types of this clientset into the given scheme. This allows composition +// of clientsets, like in: +// +// import ( +// "k8s.io/client-go/kubernetes" +// clientsetscheme "k8s.io/client-go/kubernetes/scheme" +// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme" +// ) +// +// kclientset, _ := kubernetes.NewForConfig(c) +// _ = aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme) +// +// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types +// correctly. +var AddToScheme = localSchemeBuilder.AddToScheme + +func init() { + v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"}) + utilruntime.Must(AddToScheme(Scheme)) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go new file mode 100644 index 0000000000..ecfc25d1b3 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go @@ -0,0 +1,183 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1 + +import ( + "context" + "time" + + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + scheme "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + rest "k8s.io/client-go/rest" +) + +// AdminPolicyBasedExternalRoutesGetter has a method to return a AdminPolicyBasedExternalRouteInterface. +// A group's client should implement this interface. +type AdminPolicyBasedExternalRoutesGetter interface { + AdminPolicyBasedExternalRoutes() AdminPolicyBasedExternalRouteInterface +} + +// AdminPolicyBasedExternalRouteInterface has methods to work with AdminPolicyBasedExternalRoute resources. +type AdminPolicyBasedExternalRouteInterface interface { + Create(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.CreateOptions) (*v1.AdminPolicyBasedExternalRoute, error) + Update(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.UpdateOptions) (*v1.AdminPolicyBasedExternalRoute, error) + UpdateStatus(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.UpdateOptions) (*v1.AdminPolicyBasedExternalRoute, error) + Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error + Get(ctx context.Context, name string, opts metav1.GetOptions) (*v1.AdminPolicyBasedExternalRoute, error) + List(ctx context.Context, opts metav1.ListOptions) (*v1.AdminPolicyBasedExternalRouteList, error) + Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.AdminPolicyBasedExternalRoute, err error) + AdminPolicyBasedExternalRouteExpansion +} + +// adminPolicyBasedExternalRoutes implements AdminPolicyBasedExternalRouteInterface +type adminPolicyBasedExternalRoutes struct { + client rest.Interface +} + +// newAdminPolicyBasedExternalRoutes returns a AdminPolicyBasedExternalRoutes +func newAdminPolicyBasedExternalRoutes(c *K8sV1Client) *adminPolicyBasedExternalRoutes { + return &adminPolicyBasedExternalRoutes{ + client: c.RESTClient(), + } +} + +// Get takes name of the adminPolicyBasedExternalRoute, and returns the corresponding adminPolicyBasedExternalRoute object, and an error if there is any. +func (c *adminPolicyBasedExternalRoutes) Get(ctx context.Context, name string, options metav1.GetOptions) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Get(). + Resource("adminpolicybasedexternalroutes"). + Name(name). + VersionedParams(&options, scheme.ParameterCodec). + Do(ctx). + Into(result) + return +} + +// List takes label and field selectors, and returns the list of AdminPolicyBasedExternalRoutes that match those selectors. +func (c *adminPolicyBasedExternalRoutes) List(ctx context.Context, opts metav1.ListOptions) (result *v1.AdminPolicyBasedExternalRouteList, err error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + result = &v1.AdminPolicyBasedExternalRouteList{} + err = c.client.Get(). + Resource("adminpolicybasedexternalroutes"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Do(ctx). + Into(result) + return +} + +// Watch returns a watch.Interface that watches the requested adminPolicyBasedExternalRoutes. +func (c *adminPolicyBasedExternalRoutes) Watch(ctx context.Context, opts metav1.ListOptions) (watch.Interface, error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + opts.Watch = true + return c.client.Get(). + Resource("adminpolicybasedexternalroutes"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Watch(ctx) +} + +// Create takes the representation of a adminPolicyBasedExternalRoute and creates it. Returns the server's representation of the adminPolicyBasedExternalRoute, and an error, if there is any. +func (c *adminPolicyBasedExternalRoutes) Create(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.CreateOptions) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Post(). + Resource("adminpolicybasedexternalroutes"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(adminPolicyBasedExternalRoute). + Do(ctx). + Into(result) + return +} + +// Update takes the representation of a adminPolicyBasedExternalRoute and updates it. Returns the server's representation of the adminPolicyBasedExternalRoute, and an error, if there is any. +func (c *adminPolicyBasedExternalRoutes) Update(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.UpdateOptions) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Put(). + Resource("adminpolicybasedexternalroutes"). + Name(adminPolicyBasedExternalRoute.Name). + VersionedParams(&opts, scheme.ParameterCodec). + Body(adminPolicyBasedExternalRoute). + Do(ctx). + Into(result) + return +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *adminPolicyBasedExternalRoutes) UpdateStatus(ctx context.Context, adminPolicyBasedExternalRoute *v1.AdminPolicyBasedExternalRoute, opts metav1.UpdateOptions) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Put(). + Resource("adminpolicybasedexternalroutes"). + Name(adminPolicyBasedExternalRoute.Name). + SubResource("status"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(adminPolicyBasedExternalRoute). + Do(ctx). + Into(result) + return +} + +// Delete takes name of the adminPolicyBasedExternalRoute and deletes it. Returns an error if one occurs. +func (c *adminPolicyBasedExternalRoutes) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error { + return c.client.Delete(). + Resource("adminpolicybasedexternalroutes"). + Name(name). + Body(&opts). + Do(ctx). + Error() +} + +// DeleteCollection deletes a collection of objects. +func (c *adminPolicyBasedExternalRoutes) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error { + var timeout time.Duration + if listOpts.TimeoutSeconds != nil { + timeout = time.Duration(*listOpts.TimeoutSeconds) * time.Second + } + return c.client.Delete(). + Resource("adminpolicybasedexternalroutes"). + VersionedParams(&listOpts, scheme.ParameterCodec). + Timeout(timeout). + Body(&opts). + Do(ctx). + Error() +} + +// Patch applies the patch and returns the patched adminPolicyBasedExternalRoute. +func (c *adminPolicyBasedExternalRoutes) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *v1.AdminPolicyBasedExternalRoute, err error) { + result = &v1.AdminPolicyBasedExternalRoute{} + err = c.client.Patch(pt). + Resource("adminpolicybasedexternalroutes"). + Name(name). + SubResource(subresources...). + VersionedParams(&opts, scheme.ParameterCodec). + Body(data). + Do(ctx). + Into(result) + return +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedroute_client.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedroute_client.go new file mode 100644 index 0000000000..aeb89508e9 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/adminpolicybasedroute_client.go @@ -0,0 +1,88 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/scheme" + rest "k8s.io/client-go/rest" +) + +type K8sV1Interface interface { + RESTClient() rest.Interface + AdminPolicyBasedExternalRoutesGetter +} + +// K8sV1Client is used to interact with features provided by the k8s.ovn.org group. +type K8sV1Client struct { + restClient rest.Interface +} + +func (c *K8sV1Client) AdminPolicyBasedExternalRoutes() AdminPolicyBasedExternalRouteInterface { + return newAdminPolicyBasedExternalRoutes(c) +} + +// NewForConfig creates a new K8sV1Client for the given config. +func NewForConfig(c *rest.Config) (*K8sV1Client, error) { + config := *c + if err := setConfigDefaults(&config); err != nil { + return nil, err + } + client, err := rest.RESTClientFor(&config) + if err != nil { + return nil, err + } + return &K8sV1Client{client}, nil +} + +// NewForConfigOrDie creates a new K8sV1Client for the given config and +// panics if there is an error in the config. +func NewForConfigOrDie(c *rest.Config) *K8sV1Client { + client, err := NewForConfig(c) + if err != nil { + panic(err) + } + return client +} + +// New creates a new K8sV1Client for the given RESTClient. +func New(c rest.Interface) *K8sV1Client { + return &K8sV1Client{c} +} + +func setConfigDefaults(config *rest.Config) error { + gv := v1.SchemeGroupVersion + config.GroupVersion = &gv + config.APIPath = "/apis" + config.NegotiatedSerializer = scheme.Codecs.WithoutConversion() + + if config.UserAgent == "" { + config.UserAgent = rest.DefaultKubernetesUserAgent() + } + + return nil +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *K8sV1Client) RESTClient() rest.Interface { + if c == nil { + return nil + } + return c.restClient +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/doc.go new file mode 100644 index 0000000000..b22b05acdb --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// This package has the automatically generated typed clients. +package v1 diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/doc.go new file mode 100644 index 0000000000..422564f2d5 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/doc.go @@ -0,0 +1,19 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +// Package fake has the automatically generated clients. +package fake diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedexternalroute.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedexternalroute.go new file mode 100644 index 0000000000..448b431522 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedexternalroute.go @@ -0,0 +1,132 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + "context" + + adminpolicybasedroutev1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + labels "k8s.io/apimachinery/pkg/labels" + schema "k8s.io/apimachinery/pkg/runtime/schema" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + testing "k8s.io/client-go/testing" +) + +// FakeAdminPolicyBasedExternalRoutes implements AdminPolicyBasedExternalRouteInterface +type FakeAdminPolicyBasedExternalRoutes struct { + Fake *FakeK8sV1 +} + +var adminpolicybasedexternalroutesResource = schema.GroupVersionResource{Group: "k8s.ovn.org", Version: "v1", Resource: "adminpolicybasedexternalroutes"} + +var adminpolicybasedexternalroutesKind = schema.GroupVersionKind{Group: "k8s.ovn.org", Version: "v1", Kind: "AdminPolicyBasedExternalRoute"} + +// Get takes name of the adminPolicyBasedExternalRoute, and returns the corresponding adminPolicyBasedExternalRoute object, and an error if there is any. +func (c *FakeAdminPolicyBasedExternalRoutes) Get(ctx context.Context, name string, options v1.GetOptions) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootGetAction(adminpolicybasedexternalroutesResource, name), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} + +// List takes label and field selectors, and returns the list of AdminPolicyBasedExternalRoutes that match those selectors. +func (c *FakeAdminPolicyBasedExternalRoutes) List(ctx context.Context, opts v1.ListOptions) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootListAction(adminpolicybasedexternalroutesResource, adminpolicybasedexternalroutesKind, opts), &adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList{}) + if obj == nil { + return nil, err + } + + label, _, _ := testing.ExtractFromListOptions(opts) + if label == nil { + label = labels.Everything() + } + list := &adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList{ListMeta: obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList).ListMeta} + for _, item := range obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList).Items { + if label.Matches(labels.Set(item.Labels)) { + list.Items = append(list.Items, item) + } + } + return list, err +} + +// Watch returns a watch.Interface that watches the requested adminPolicyBasedExternalRoutes. +func (c *FakeAdminPolicyBasedExternalRoutes) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { + return c.Fake. + InvokesWatch(testing.NewRootWatchAction(adminpolicybasedexternalroutesResource, opts)) +} + +// Create takes the representation of a adminPolicyBasedExternalRoute and creates it. Returns the server's representation of the adminPolicyBasedExternalRoute, and an error, if there is any. +func (c *FakeAdminPolicyBasedExternalRoutes) Create(ctx context.Context, adminPolicyBasedExternalRoute *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, opts v1.CreateOptions) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootCreateAction(adminpolicybasedexternalroutesResource, adminPolicyBasedExternalRoute), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} + +// Update takes the representation of a adminPolicyBasedExternalRoute and updates it. Returns the server's representation of the adminPolicyBasedExternalRoute, and an error, if there is any. +func (c *FakeAdminPolicyBasedExternalRoutes) Update(ctx context.Context, adminPolicyBasedExternalRoute *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, opts v1.UpdateOptions) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootUpdateAction(adminpolicybasedexternalroutesResource, adminPolicyBasedExternalRoute), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *FakeAdminPolicyBasedExternalRoutes) UpdateStatus(ctx context.Context, adminPolicyBasedExternalRoute *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, opts v1.UpdateOptions) (*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, error) { + obj, err := c.Fake. + Invokes(testing.NewRootUpdateSubresourceAction(adminpolicybasedexternalroutesResource, "status", adminPolicyBasedExternalRoute), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} + +// Delete takes name of the adminPolicyBasedExternalRoute and deletes it. Returns an error if one occurs. +func (c *FakeAdminPolicyBasedExternalRoutes) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { + _, err := c.Fake. + Invokes(testing.NewRootDeleteAction(adminpolicybasedexternalroutesResource, name), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + return err +} + +// DeleteCollection deletes a collection of objects. +func (c *FakeAdminPolicyBasedExternalRoutes) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { + action := testing.NewRootDeleteCollectionAction(adminpolicybasedexternalroutesResource, listOpts) + + _, err := c.Fake.Invokes(action, &adminpolicybasedroutev1.AdminPolicyBasedExternalRouteList{}) + return err +} + +// Patch applies the patch and returns the patched adminPolicyBasedExternalRoute. +func (c *FakeAdminPolicyBasedExternalRoutes) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootPatchSubresourceAction(adminpolicybasedexternalroutesResource, name, pt, data, subresources...), &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}) + if obj == nil { + return nil, err + } + return obj.(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute), err +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedroute_client.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedroute_client.go new file mode 100644 index 0000000000..c6de68dffb --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/fake/fake_adminpolicybasedroute_client.go @@ -0,0 +1,39 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1" + rest "k8s.io/client-go/rest" + testing "k8s.io/client-go/testing" +) + +type FakeK8sV1 struct { + *testing.Fake +} + +func (c *FakeK8sV1) AdminPolicyBasedExternalRoutes() v1.AdminPolicyBasedExternalRouteInterface { + return &FakeAdminPolicyBasedExternalRoutes{c} +} + +// RESTClient returns a RESTClient that is used to communicate +// with API server by this client implementation. +func (c *FakeK8sV1) RESTClient() rest.Interface { + var ret *rest.RESTClient + return ret +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/generated_expansion.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/generated_expansion.go new file mode 100644 index 0000000000..e933837f77 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/typed/adminpolicybasedroute/v1/generated_expansion.go @@ -0,0 +1,20 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by client-gen. DO NOT EDIT. + +package v1 + +type AdminPolicyBasedExternalRouteExpansion interface{} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/interface.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/interface.go new file mode 100644 index 0000000000..d455c82fbf --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/interface.go @@ -0,0 +1,45 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package adminpolicybasedroute + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces" +) + +// Interface provides access to each of this group's versions. +type Interface interface { + // V1 provides access to shared informers for resources in V1. + V1() v1.Interface +} + +type group struct { + factory internalinterfaces.SharedInformerFactory + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// New returns a new Interface. +func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { + return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} +} + +// V1 returns a new v1.Interface. +func (g *group) V1() v1.Interface { + return v1.New(g.factory, g.namespace, g.tweakListOptions) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go new file mode 100644 index 0000000000..e8ff325a47 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go @@ -0,0 +1,88 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package v1 + +import ( + "context" + time "time" + + adminpolicybasedroutev1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces" + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" +) + +// AdminPolicyBasedExternalRouteInformer provides access to a shared informer and lister for +// AdminPolicyBasedExternalRoutes. +type AdminPolicyBasedExternalRouteInformer interface { + Informer() cache.SharedIndexInformer + Lister() v1.AdminPolicyBasedExternalRouteLister +} + +type adminPolicyBasedExternalRouteInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// NewAdminPolicyBasedExternalRouteInformer constructs a new informer for AdminPolicyBasedExternalRoute type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewAdminPolicyBasedExternalRouteInformer(client versioned.Interface, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredAdminPolicyBasedExternalRouteInformer(client, resyncPeriod, indexers, nil) +} + +// NewFilteredAdminPolicyBasedExternalRouteInformer constructs a new informer for AdminPolicyBasedExternalRoute type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredAdminPolicyBasedExternalRouteInformer(client versioned.Interface, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.K8sV1().AdminPolicyBasedExternalRoutes().List(context.TODO(), options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.K8sV1().AdminPolicyBasedExternalRoutes().Watch(context.TODO(), options) + }, + }, + &adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}, + resyncPeriod, + indexers, + ) +} + +func (f *adminPolicyBasedExternalRouteInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredAdminPolicyBasedExternalRouteInformer(client, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *adminPolicyBasedExternalRouteInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&adminpolicybasedroutev1.AdminPolicyBasedExternalRoute{}, f.defaultInformer) +} + +func (f *adminPolicyBasedExternalRouteInformer) Lister() v1.AdminPolicyBasedExternalRouteLister { + return v1.NewAdminPolicyBasedExternalRouteLister(f.Informer().GetIndexer()) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/interface.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/interface.go new file mode 100644 index 0000000000..bef8d47165 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute/v1/interface.go @@ -0,0 +1,44 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package v1 + +import ( + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces" +) + +// Interface provides access to all the informers in this group version. +type Interface interface { + // AdminPolicyBasedExternalRoutes returns a AdminPolicyBasedExternalRouteInformer. + AdminPolicyBasedExternalRoutes() AdminPolicyBasedExternalRouteInformer +} + +type version struct { + factory internalinterfaces.SharedInformerFactory + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// New returns a new Interface. +func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface { + return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} +} + +// AdminPolicyBasedExternalRoutes returns a AdminPolicyBasedExternalRouteInformer. +func (v *version) AdminPolicyBasedExternalRoutes() AdminPolicyBasedExternalRouteInformer { + return &adminPolicyBasedExternalRouteInformer{factory: v.factory, tweakListOptions: v.tweakListOptions} +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/factory.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/factory.go new file mode 100644 index 0000000000..b3332112ec --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/factory.go @@ -0,0 +1,179 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package externalversions + +import ( + reflect "reflect" + sync "sync" + time "time" + + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + adminpolicybasedroute "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/adminpolicybasedroute" + internalinterfaces "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + schema "k8s.io/apimachinery/pkg/runtime/schema" + cache "k8s.io/client-go/tools/cache" +) + +// SharedInformerOption defines the functional option type for SharedInformerFactory. +type SharedInformerOption func(*sharedInformerFactory) *sharedInformerFactory + +type sharedInformerFactory struct { + client versioned.Interface + namespace string + tweakListOptions internalinterfaces.TweakListOptionsFunc + lock sync.Mutex + defaultResync time.Duration + customResync map[reflect.Type]time.Duration + + informers map[reflect.Type]cache.SharedIndexInformer + // startedInformers is used for tracking which informers have been started. + // This allows Start() to be called multiple times safely. + startedInformers map[reflect.Type]bool +} + +// WithCustomResyncConfig sets a custom resync period for the specified informer types. +func WithCustomResyncConfig(resyncConfig map[v1.Object]time.Duration) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + for k, v := range resyncConfig { + factory.customResync[reflect.TypeOf(k)] = v + } + return factory + } +} + +// WithTweakListOptions sets a custom filter on all listers of the configured SharedInformerFactory. +func WithTweakListOptions(tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.tweakListOptions = tweakListOptions + return factory + } +} + +// WithNamespace limits the SharedInformerFactory to the specified namespace. +func WithNamespace(namespace string) SharedInformerOption { + return func(factory *sharedInformerFactory) *sharedInformerFactory { + factory.namespace = namespace + return factory + } +} + +// NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces. +func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory { + return NewSharedInformerFactoryWithOptions(client, defaultResync) +} + +// NewFilteredSharedInformerFactory constructs a new instance of sharedInformerFactory. +// Listers obtained via this SharedInformerFactory will be subject to the same filters +// as specified here. +// Deprecated: Please use NewSharedInformerFactoryWithOptions instead +func NewFilteredSharedInformerFactory(client versioned.Interface, defaultResync time.Duration, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerFactory { + return NewSharedInformerFactoryWithOptions(client, defaultResync, WithNamespace(namespace), WithTweakListOptions(tweakListOptions)) +} + +// NewSharedInformerFactoryWithOptions constructs a new instance of a SharedInformerFactory with additional options. +func NewSharedInformerFactoryWithOptions(client versioned.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory { + factory := &sharedInformerFactory{ + client: client, + namespace: v1.NamespaceAll, + defaultResync: defaultResync, + informers: make(map[reflect.Type]cache.SharedIndexInformer), + startedInformers: make(map[reflect.Type]bool), + customResync: make(map[reflect.Type]time.Duration), + } + + // Apply all options + for _, opt := range options { + factory = opt(factory) + } + + return factory +} + +// Start initializes all requested informers. +func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) { + f.lock.Lock() + defer f.lock.Unlock() + + for informerType, informer := range f.informers { + if !f.startedInformers[informerType] { + go informer.Run(stopCh) + f.startedInformers[informerType] = true + } + } +} + +// WaitForCacheSync waits for all started informers' cache were synced. +func (f *sharedInformerFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool { + informers := func() map[reflect.Type]cache.SharedIndexInformer { + f.lock.Lock() + defer f.lock.Unlock() + + informers := map[reflect.Type]cache.SharedIndexInformer{} + for informerType, informer := range f.informers { + if f.startedInformers[informerType] { + informers[informerType] = informer + } + } + return informers + }() + + res := map[reflect.Type]bool{} + for informType, informer := range informers { + res[informType] = cache.WaitForCacheSync(stopCh, informer.HasSynced) + } + return res +} + +// InternalInformerFor returns the SharedIndexInformer for obj using an internal +// client. +func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer { + f.lock.Lock() + defer f.lock.Unlock() + + informerType := reflect.TypeOf(obj) + informer, exists := f.informers[informerType] + if exists { + return informer + } + + resyncPeriod, exists := f.customResync[informerType] + if !exists { + resyncPeriod = f.defaultResync + } + + informer = newFunc(f.client, resyncPeriod) + f.informers[informerType] = informer + + return informer +} + +// SharedInformerFactory provides shared informers for resources in all known +// API group versions. +type SharedInformerFactory interface { + internalinterfaces.SharedInformerFactory + ForResource(resource schema.GroupVersionResource) (GenericInformer, error) + WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool + + K8s() adminpolicybasedroute.Interface +} + +func (f *sharedInformerFactory) K8s() adminpolicybasedroute.Interface { + return adminpolicybasedroute.New(f, f.namespace, f.tweakListOptions) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/generic.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/generic.go new file mode 100644 index 0000000000..f3473df83c --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/generic.go @@ -0,0 +1,61 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package externalversions + +import ( + "fmt" + + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + schema "k8s.io/apimachinery/pkg/runtime/schema" + cache "k8s.io/client-go/tools/cache" +) + +// GenericInformer is type of SharedIndexInformer which will locate and delegate to other +// sharedInformers based on type +type GenericInformer interface { + Informer() cache.SharedIndexInformer + Lister() cache.GenericLister +} + +type genericInformer struct { + informer cache.SharedIndexInformer + resource schema.GroupResource +} + +// Informer returns the SharedIndexInformer. +func (f *genericInformer) Informer() cache.SharedIndexInformer { + return f.informer +} + +// Lister returns the GenericLister. +func (f *genericInformer) Lister() cache.GenericLister { + return cache.NewGenericLister(f.Informer().GetIndexer(), f.resource) +} + +// ForResource gives generic access to a shared informer of the matching type +// TODO extend this to unknown resources with a client pool +func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) { + switch resource { + // Group=k8s.ovn.org, Version=v1 + case v1.SchemeGroupVersion.WithResource("adminpolicybasedexternalroutes"): + return &genericInformer{resource: resource.GroupResource(), informer: f.K8s().V1().AdminPolicyBasedExternalRoutes().Informer()}, nil + + } + + return nil, fmt.Errorf("no informer found for %v", resource) +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go new file mode 100644 index 0000000000..1a2c758db5 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions/internalinterfaces/factory_interfaces.go @@ -0,0 +1,39 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by informer-gen. DO NOT EDIT. + +package internalinterfaces + +import ( + time "time" + + versioned "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + cache "k8s.io/client-go/tools/cache" +) + +// NewInformerFunc takes versioned.Interface and time.Duration to return a SharedIndexInformer. +type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer + +// SharedInformerFactory a small interface to allow for adding an informer without an import cycle +type SharedInformerFactory interface { + Start(stopCh <-chan struct{}) + InformerFor(obj runtime.Object, newFunc NewInformerFunc) cache.SharedIndexInformer +} + +// TweakListOptionsFunc is a function that transforms a v1.ListOptions. +type TweakListOptionsFunc func(*v1.ListOptions) diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go new file mode 100644 index 0000000000..d126826372 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/adminpolicybasedexternalroute.go @@ -0,0 +1,67 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by lister-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/tools/cache" +) + +// AdminPolicyBasedExternalRouteLister helps list AdminPolicyBasedExternalRoutes. +// All objects returned here must be treated as read-only. +type AdminPolicyBasedExternalRouteLister interface { + // List lists all AdminPolicyBasedExternalRoutes in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*v1.AdminPolicyBasedExternalRoute, err error) + // Get retrieves the AdminPolicyBasedExternalRoute from the index for a given name. + // Objects returned here must be treated as read-only. + Get(name string) (*v1.AdminPolicyBasedExternalRoute, error) + AdminPolicyBasedExternalRouteListerExpansion +} + +// adminPolicyBasedExternalRouteLister implements the AdminPolicyBasedExternalRouteLister interface. +type adminPolicyBasedExternalRouteLister struct { + indexer cache.Indexer +} + +// NewAdminPolicyBasedExternalRouteLister returns a new AdminPolicyBasedExternalRouteLister. +func NewAdminPolicyBasedExternalRouteLister(indexer cache.Indexer) AdminPolicyBasedExternalRouteLister { + return &adminPolicyBasedExternalRouteLister{indexer: indexer} +} + +// List lists all AdminPolicyBasedExternalRoutes in the indexer. +func (s *adminPolicyBasedExternalRouteLister) List(selector labels.Selector) (ret []*v1.AdminPolicyBasedExternalRoute, err error) { + err = cache.ListAll(s.indexer, selector, func(m interface{}) { + ret = append(ret, m.(*v1.AdminPolicyBasedExternalRoute)) + }) + return ret, err +} + +// Get retrieves the AdminPolicyBasedExternalRoute from the index for a given name. +func (s *adminPolicyBasedExternalRouteLister) Get(name string) (*v1.AdminPolicyBasedExternalRoute, error) { + obj, exists, err := s.indexer.GetByKey(name) + if err != nil { + return nil, err + } + if !exists { + return nil, errors.NewNotFound(v1.Resource("adminpolicybasedexternalroute"), name) + } + return obj.(*v1.AdminPolicyBasedExternalRoute), nil +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/expansion_generated.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/expansion_generated.go new file mode 100644 index 0000000000..203e145172 --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1/expansion_generated.go @@ -0,0 +1,22 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by lister-gen. DO NOT EDIT. + +package v1 + +// AdminPolicyBasedExternalRouteListerExpansion allows custom methods to be added to +// AdminPolicyBasedExternalRouteLister. +type AdminPolicyBasedExternalRouteListerExpansion interface{} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/doc.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/doc.go new file mode 100644 index 0000000000..7b121f971b --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/doc.go @@ -0,0 +1,4 @@ +// Package v1 contains API Schema definitions for the network v1 API group +// +k8s:deepcopy-gen=package,register +// +groupName=k8s.ovn.org +package v1 diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/register.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/register.go new file mode 100644 index 0000000000..876b6e355c --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/register.go @@ -0,0 +1,29 @@ +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + GroupName = "k8s.ovn.org" + SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1"} + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + AddToScheme = SchemeBuilder.AddToScheme +) + +// Resource takes an unqualified resource and returns a Group qualified GroupResource +func Resource(resource string) schema.GroupResource { + return SchemeGroupVersion.WithResource(resource).GroupResource() +} + +// Adds the list of known types to api.Scheme. +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(SchemeGroupVersion, + &AdminPolicyBasedExternalRoute{}, + &AdminPolicyBasedExternalRouteList{}, + ) + metav1.AddToGroupVersion(scheme, SchemeGroupVersion) + return nil +} diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/types.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/types.go new file mode 100644 index 0000000000..d9eac0c3db --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/types.go @@ -0,0 +1,166 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +genclient +// +genclient:nonNamespaced +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +kubebuilder:resource:path=adminpolicybasedexternalroutes,scope=Cluster,shortName=apbexternalroute,singular=adminpolicybasedexternalroute +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Last Update",type="date",JSONPath=`.status.lastTransitionTime` +// +kubebuilder:printcolumn:name="Status",type="string",JSONPath=`.status.status` +// AdminPolicyBasedExternalRoute is a CRD allowing the cluster administrators to configure policies for external gateway IPs to be applied to all the pods contained in selected namespaces. +// Egress traffic from the pods that belong to the selected namespaces to outside the cluster is routed through these external gateway IPs. +type AdminPolicyBasedExternalRoute struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + // +kubebuilder:validation:Required + // +required + Spec AdminPolicyBasedExternalRouteSpec `json:"spec"` + // +optional + Status AdminPolicyBasedRouteStatus `json:"status,omitempty"` +} + +// AdminPolicyBasedExternalRouteSpec defines the desired state of AdminPolicyBasedExternalRoute +type AdminPolicyBasedExternalRouteSpec struct { + // From defines the selectors that will determine the target namespaces to this CR. + From ExternalNetworkSource `json:"from"` + // NextHops defines two types of hops: Static and Dynamic. Each hop defines at least one external gateway IP. + NextHops ExternalNextHops `json:"nextHops"` +} + +// ExternalNetworkSource contains the selectors used to determine the namespaces where the policy will be applied to +type ExternalNetworkSource struct { + // NamespaceSelector defines a selector to be used to determine which namespaces will be targeted by this CR + NamespaceSelector metav1.LabelSelector `json:"namespaceSelector"` +} + +// +kubebuilder:validation:MinProperties:=1 +// ExternalNextHops contains slices of StaticHops and DynamicHops structures. Minimum is one StaticHop or one DynamicHop. +type ExternalNextHops struct { + // StaticHops defines a slice of StaticHop. This field is optional. + StaticHops []*StaticHop `json:"static,omitempty"` + //DynamicHops defines a slices of DynamicHop. This field is optional. + DynamicHops []*DynamicHop `json:"dynamic,omitempty"` +} + +// StaticHop defines the configuration of a static IP that acts as an external Gateway Interface. IP field is mandatory. +type StaticHop struct { + //IP defines the static IP to be used for egress traffic. The IP can be either IPv4 or IPv6. + // +kubebuilder:validation:Required + // +required + IP string `json:"ip"` + // BFDEnabled determines if the interface implements the Bidirectional Forward Detection protocol. Defaults to false. + // +optional + // +kubebuilder:default:=false + // +default=false + BFDEnabled bool `json:"bfdEnabled,omitempty"` + // SkipHostSNAT determines whether to disable Source NAT to the host IP. Defaults to false. + // +optional + // +kubebuilder:default:=false + // +default=false + // SkipHostSNAT bool `json:"skipHostSNAT,omitempty"` +} + +// DynamicHop defines the configuration for a dynamic external gateway interface. +// These interfaces are wrapped around a pod object that resides inside the cluster. +// The field NetworkAttachmentName captures the name of the multus network name to use when retrieving the gateway IP to use. +// The PodSelector and the NamespaceSelector are mandatory fields. +type DynamicHop struct { + // PodSelector defines the selector to filter the pods that are external gateways. + // +kubebuilder:validation:Required + // +required + PodSelector metav1.LabelSelector `json:"podSelector"` + // NamespaceSelector defines a selector to filter the namespaces where the pod gateways are located. + // +kubebuilder:validation:Optional + // +optional + NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector"` + // NetworkAttachmentName determines the multus network name to use when retrieving the pod IPs that will be used as the gateway IP. + // When this field is empty, the logic assumes that the pod is configured with HostNetwork and is using the node's IP as gateway. + // +optional + // +kubebuilder:default="" + // +default="" + NetworkAttachmentName string `json:"networkAttachmentName,omitempty"` + // BFDEnabled determines if the interface implements the Bidirectional Forward Detection protocol. Defaults to false. + // +optional + // +kubebuilder:default:=false + // +default=false + BFDEnabled bool `json:"bfdEnabled,omitempty"` + // SkipHostSNAT determines whether to disable Source NAT to the host IP. Defaults to false + // +optional + // +kubebuilder:default:=false + // +default=false + // SkipHostSNAT bool `json:"skipHostSNAT,omitempty"` +} + +// +kubebuilder:object:root=true +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// AdminPolicyBasedExternalRouteList contains a list of AdminPolicyBasedExternalRoutes +type AdminPolicyBasedExternalRouteList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []AdminPolicyBasedExternalRoute `json:"items"` +} + +// AdminPolicyBasedRouteStatus contains the observed status of the AdminPolicyBased route types. +type AdminPolicyBasedRouteStatus struct { + // Captures the time when the last change was applied. + LastTransitionTime metav1.Time `json:"lastTransitionTime"` + // An array of Human-readable messages indicating details about the status of the object. + Messages []string `json:"messages"` + // A concise indication of whether the AdminPolicyBasedRoute resource is applied with success + Status StatusType `json:"status"` +} + +// StatusType defines the types of status used in the Status field. The value determines if the +// deployment of the CR was successful or if it failed. +type StatusType string + +const ( + SuccessStatus StatusType = "Success" + FailStatus StatusType = "Fail" +) + +// // +genclient +// // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// // +kubebuilder:resource:path=adminpolicybasedexternalroute,scope=Cluster +// // +kubebuilder:object:root=true +// // +kubebuilder:subresource:status +// type AdminPolicyBasedInternalRoute struct { +// metav1.TypeMeta `json:",inline"` +// metav1.ObjectMeta `json:"metadata,omitempty"` +// Spec AdminPolicyBasedInternalRouteSpec `json:"spec,omitempty"` +// Status AdminPolicyBasedRouteStatus `json:"status,omitempty"` +// } + +// // AdminPolicyBasedInternalRouteSpec defines the desired state of AdminPolicyBasedInternalRoute +// type AdminPolicyBasedInternalRouteSpec struct { +// } + +// // +kubebuilder:object:root=true +// // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// // AdminPolicyBasedExternalRouteList contains a list of AdminPolicyBasedExternalRoutes +// type AdminPolicyBasedInternalRouteList struct { +// metav1.TypeMeta `json:",inline"` +// metav1.ListMeta `json:"metadata,omitempty"` +// Items []AdminPolicyBasedInternalRoute `json:"items"` +// } diff --git a/go-controller/pkg/crd/adminpolicybasedroute/v1/zz_generated.deepcopy.go b/go-controller/pkg/crd/adminpolicybasedroute/v1/zz_generated.deepcopy.go new file mode 100644 index 0000000000..e6d4c308cc --- /dev/null +++ b/go-controller/pkg/crd/adminpolicybasedroute/v1/zz_generated.deepcopy.go @@ -0,0 +1,220 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AdminPolicyBasedExternalRoute) DeepCopyInto(out *AdminPolicyBasedExternalRoute) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdminPolicyBasedExternalRoute. +func (in *AdminPolicyBasedExternalRoute) DeepCopy() *AdminPolicyBasedExternalRoute { + if in == nil { + return nil + } + out := new(AdminPolicyBasedExternalRoute) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *AdminPolicyBasedExternalRoute) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AdminPolicyBasedExternalRouteList) DeepCopyInto(out *AdminPolicyBasedExternalRouteList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]AdminPolicyBasedExternalRoute, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdminPolicyBasedExternalRouteList. +func (in *AdminPolicyBasedExternalRouteList) DeepCopy() *AdminPolicyBasedExternalRouteList { + if in == nil { + return nil + } + out := new(AdminPolicyBasedExternalRouteList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *AdminPolicyBasedExternalRouteList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AdminPolicyBasedExternalRouteSpec) DeepCopyInto(out *AdminPolicyBasedExternalRouteSpec) { + *out = *in + in.From.DeepCopyInto(&out.From) + in.NextHops.DeepCopyInto(&out.NextHops) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdminPolicyBasedExternalRouteSpec. +func (in *AdminPolicyBasedExternalRouteSpec) DeepCopy() *AdminPolicyBasedExternalRouteSpec { + if in == nil { + return nil + } + out := new(AdminPolicyBasedExternalRouteSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *AdminPolicyBasedRouteStatus) DeepCopyInto(out *AdminPolicyBasedRouteStatus) { + *out = *in + in.LastTransitionTime.DeepCopyInto(&out.LastTransitionTime) + if in.Messages != nil { + in, out := &in.Messages, &out.Messages + *out = make([]string, len(*in)) + copy(*out, *in) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AdminPolicyBasedRouteStatus. +func (in *AdminPolicyBasedRouteStatus) DeepCopy() *AdminPolicyBasedRouteStatus { + if in == nil { + return nil + } + out := new(AdminPolicyBasedRouteStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DynamicHop) DeepCopyInto(out *DynamicHop) { + *out = *in + in.PodSelector.DeepCopyInto(&out.PodSelector) + if in.NamespaceSelector != nil { + in, out := &in.NamespaceSelector, &out.NamespaceSelector + *out = new(metav1.LabelSelector) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamicHop. +func (in *DynamicHop) DeepCopy() *DynamicHop { + if in == nil { + return nil + } + out := new(DynamicHop) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExternalNetworkSource) DeepCopyInto(out *ExternalNetworkSource) { + *out = *in + in.NamespaceSelector.DeepCopyInto(&out.NamespaceSelector) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExternalNetworkSource. +func (in *ExternalNetworkSource) DeepCopy() *ExternalNetworkSource { + if in == nil { + return nil + } + out := new(ExternalNetworkSource) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExternalNextHops) DeepCopyInto(out *ExternalNextHops) { + *out = *in + if in.StaticHops != nil { + in, out := &in.StaticHops, &out.StaticHops + *out = make([]*StaticHop, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(StaticHop) + **out = **in + } + } + } + if in.DynamicHops != nil { + in, out := &in.DynamicHops, &out.DynamicHops + *out = make([]*DynamicHop, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(DynamicHop) + (*in).DeepCopyInto(*out) + } + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExternalNextHops. +func (in *ExternalNextHops) DeepCopy() *ExternalNextHops { + if in == nil { + return nil + } + out := new(ExternalNextHops) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StaticHop) DeepCopyInto(out *StaticHop) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StaticHop. +func (in *StaticHop) DeepCopy() *StaticHop { + if in == nil { + return nil + } + out := new(StaticHop) + in.DeepCopyInto(out) + return out +} From 91046e8892e603822cbae2c5ff1351b703664da8 Mon Sep 17 00:00:00 2001 From: jordigilh Date: Sat, 8 Apr 2023 10:51:27 -0400 Subject: [PATCH 08/73] Summary of changes: * Implements controllers for Admin Policy Based External Route to handle changes to namespaces, pods and admin policy based external route CRs. * Initialize in master node to handle interactions with the north bound DB. Initialize in worker nodes to handle changes to the conntrack (delete ECMP entries when a gateway IP is no longer a valid external gateway IP) * Implements repair() function for the master node. * Integrates with the annotation logic to avoid duplications in cache by sharing the externalGWCache and EXGWCacheMutex objects between the annotation and controller logic. * Updates the annotation logic to ensure the namespace anontation k8s.ovn.org/external-gw-pod-ips is updated when changes occur in a CR instance that coexists in the same namespace and that can impact the list of dynamic gateway IPs. * The implementation no longer relies on namespace annotations, including "k8s.ovn.org/external-gw-pod-ips", instead it uses its own cache structure to identify the valid pod IPs for a given namespace. * Implement E2E tests for admin policy based external route. The tests are a duplication of the existing annotated based logic for external gateways using the CR instead. Signed-off-by: jordigilh --- README.md | 2 + contrib/kind.sh | 1 + dist/images/daemonset.sh | 1 + ...org_adminpolicybasedexternalroutes.yaml.j2 | 289 ++ dist/templates/ovn-setup.yaml.j2 | 6 + go-controller/pkg/factory/factory.go | 12 +- .../pkg/factory/mocks/NodeWatchFactory.go | 34 + go-controller/pkg/factory/types.go | 4 + go-controller/pkg/kube/kube.go | 2 + go-controller/pkg/kube/mocks/Interface.go | 153 +- go-controller/pkg/libovsdbops/router.go | 18 + .../network_controller_manager.go | 1 + .../node_network_controller_manager.go | 27 +- .../base_node_network_controller_dpu_test.go | 4 +- .../node/default_node_network_controller.go | 60 +- .../pkg/node/gateway_init_linux_test.go | 6 +- go-controller/pkg/node/ovn_test.go | 8 +- .../apbroute/apbroute_suite_test.go | 13 + .../apbroute/external_controller.go | 372 ++ .../apbroute/external_controller_namespace.go | 137 + .../external_controller_namespace_test.go | 358 ++ .../apbroute/external_controller_pod.go | 436 +++ .../apbroute/external_controller_pod_test.go | 500 +++ .../apbroute/external_controller_policy.go | 722 ++++ .../external_controller_policy_test.go | 723 ++++ .../controller/apbroute/master_controller.go | 568 +++ .../ovn/controller/apbroute/network_client.go | 744 ++++ .../controller/apbroute/node_controller.go | 496 +++ .../pkg/ovn/controller/apbroute/repair.go | 367 ++ .../pkg/ovn/default_network_controller.go | 37 +- go-controller/pkg/ovn/egressgw.go | 348 +- .../pkg/ovn/external_gateway_test.go | 2891 +++++++++++++++ go-controller/pkg/ovn/ovn_test.go | 33 +- go-controller/pkg/util/kube.go | 20 +- go-controller/pkg/util/net_linux.go | 1 - test/e2e/e2e.go | 11 +- test/e2e/external_gateways.go | 3084 ++++++++++++----- 37 files changed, 11258 insertions(+), 1231 deletions(-) create mode 100644 dist/templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 create mode 100644 go-controller/pkg/ovn/controller/apbroute/apbroute_suite_test.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_namespace.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_namespace_test.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_pod.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_pod_test.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_policy.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/external_controller_policy_test.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/master_controller.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/network_client.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/node_controller.go create mode 100644 go-controller/pkg/ovn/controller/apbroute/repair.go create mode 100644 go-controller/pkg/ovn/external_gateway_test.go diff --git a/README.md b/README.md index f424a62ffd..ec1354bb25 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,8 @@ kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/ovn kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/k8s.ovn.org_egressips.yaml # create egressfirewalls.k8s.ovn.org CRD kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/k8s.ovn.org_egressfirewalls.yaml +# create adminpolicybasedexternalroute.k8s.ovn.org CRD +kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/k8s.ovn.org_adminpolicybasedexternalroutes.yaml # Run ovnkube-db deployment. kubectl create -f $HOME/work/src/github.com/ovn-org/ovn-kubernetes/dist/yaml/ovnkube-db.yaml diff --git a/contrib/kind.sh b/contrib/kind.sh index ed2d5b2d32..9a9060a5bf 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -836,6 +836,7 @@ install_ovn() { run_kubectl apply -f k8s.ovn.org_egressips.yaml run_kubectl apply -f k8s.ovn.org_egressqoses.yaml run_kubectl apply -f k8s.ovn.org_egressservices.yaml + run_kubectl apply -f k8s.ovn.org_adminpolicybasedexternalroutes.yaml run_kubectl apply -f ovn-setup.yaml MASTER_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}" | sort | head -n "${KIND_NUM_MASTER}") # We want OVN HA not Kubernetes HA diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index a6a1f7894f..5711ee3017 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -649,5 +649,6 @@ cp ../templates/k8s.ovn.org_egressfirewalls.yaml.j2 ${output_dir}/k8s.ovn.org_eg cp ../templates/k8s.ovn.org_egressips.yaml.j2 ${output_dir}/k8s.ovn.org_egressips.yaml cp ../templates/k8s.ovn.org_egressqoses.yaml.j2 ${output_dir}/k8s.ovn.org_egressqoses.yaml cp ../templates/k8s.ovn.org_egressservices.yaml.j2 ${output_dir}/k8s.ovn.org_egressservices.yaml +cp ../templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 ${output_dir}/k8s.ovn.org_adminpolicybasedexternalroutes.yaml exit 0 diff --git a/dist/templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 b/dist/templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 new file mode 100644 index 0000000000..a0eb26a0a8 --- /dev/null +++ b/dist/templates/k8s.ovn.org_adminpolicybasedexternalroutes.yaml.j2 @@ -0,0 +1,289 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.10.0 + creationTimestamp: null + name: adminpolicybasedexternalroutes.k8s.ovn.org +spec: + group: k8s.ovn.org + names: + kind: AdminPolicyBasedExternalRoute + listKind: AdminPolicyBasedExternalRouteList + plural: adminpolicybasedexternalroutes + shortNames: + - apbexternalroute + singular: adminpolicybasedexternalroute + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.lastTransitionTime + name: Last Update + type: date + - jsonPath: .status.status + name: Status + type: string + name: v1 + schema: + openAPIV3Schema: + description: AdminPolicyBasedExternalRoute is a CRD allowing the cluster administrators + to configure policies for external gateway IPs to be applied to all the + pods contained in selected namespaces. Egress traffic from the pods that + belong to the selected namespaces to outside the cluster is routed through + these external gateway IPs. + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: AdminPolicyBasedExternalRouteSpec defines the desired state + of AdminPolicyBasedExternalRoute + properties: + from: + description: From defines the selectors that will determine the target + namespaces to this CR. + properties: + namespaceSelector: + description: NamespaceSelector defines a selector to be used to + determine which namespaces will be targeted by this CR + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that relates + the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, NotIn, + Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. If + the operator is In or NotIn, the values array must + be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced + during a strategic merge patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. A + single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field is "key", + the operator is "In", and the values array contains only + "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - namespaceSelector + type: object + nextHops: + description: 'NextHops defines two types of hops: Static and Dynamic. + Each hop defines at least one external gateway IP.' + minProperties: 1 + properties: + dynamic: + description: DynamicHops defines a slices of DynamicHop. This + field is optional. + items: + description: DynamicHop defines the configuration for a dynamic + external gateway interface. These interfaces are wrapped around + a pod object that resides inside the cluster. The field NetworkAttachmentName + captures the name of the multus network name to use when retrieving + the gateway IP to use. The PodSelector and the NamespaceSelector + are mandatory fields. + properties: + bfdEnabled: + default: false + description: BFDEnabled determines if the interface implements + the Bidirectional Forward Detection protocol. Defaults + to false. + type: boolean + namespaceSelector: + description: NamespaceSelector defines a selector to filter + the namespaces where the pod gateways are located. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + networkAttachmentName: + default: "" + description: NetworkAttachmentName determines the multus + network name to use when retrieving the pod IPs that will + be used as the gateway IP. When this field is empty, the + logic assumes that the pod is configured with HostNetwork + and is using the node's IP as gateway. + type: string + podSelector: + description: PodSelector defines the selector to filter + the pods that are external gateways. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + required: + - podSelector + type: object + type: array + static: + description: StaticHops defines a slice of StaticHop. This field + is optional. + items: + description: StaticHop defines the configuration of a static + IP that acts as an external Gateway Interface. IP field is + mandatory. + properties: + bfdEnabled: + default: false + description: BFDEnabled determines if the interface implements + the Bidirectional Forward Detection protocol. Defaults + to false. + type: boolean + ip: + description: IP defines the static IP to be used for egress + traffic. The IP can be either IPv4 or IPv6. + type: string + required: + - ip + type: object + type: array + type: object + required: + - from + - nextHops + type: object + status: + description: AdminPolicyBasedRouteStatus contains the observed status + of the AdminPolicyBased route types. + properties: + lastTransitionTime: + description: Captures the time when the last change was applied. + format: date-time + type: string + messages: + description: An array of Human-readable messages indicating details + about the status of the object. + items: + type: string + type: array + status: + description: A concise indication of whether the AdminPolicyBasedRoute + resource is applied with success + type: string + required: + - lastTransitionTime + - messages + - status + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/dist/templates/ovn-setup.yaml.j2 b/dist/templates/ovn-setup.yaml.j2 index 1319ee5b4b..041012a073 100644 --- a/dist/templates/ovn-setup.yaml.j2 +++ b/dist/templates/ovn-setup.yaml.j2 @@ -87,7 +87,13 @@ rules: - egressqoses - egressservices - egressservices/status + - adminpolicybasedexternalroutes verbs: ["list", "get", "watch", "update", "patch"] +- apiGroups: + - k8s.ovn.org + resources: + - adminpolicybasedexternalroutes/status + verbs: [ "update"] - apiGroups: - apiextensions.k8s.io resources: diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 04472d3c2f..8575a25b70 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -354,6 +354,13 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( return nil, err } + var err error + wf.informers[PodType], err = newQueuedInformer(PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, + defaultNumEventQueues) + if err != nil { + return nil, err + } + // For Services and Endpoints, pre-populate the shared Informer with one that // has a label selector excluding headless services. wf.iFactory.InformerFor(&kapi.Service{}, func(c kubernetes.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { @@ -394,7 +401,6 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( withServiceNameAndNoHeadlessServiceSelector()) }) - var err error wf.informers[NamespaceType], err = newInformer(NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer()) if err != nil { return nil, err @@ -983,8 +989,8 @@ func (wf *WatchFactory) PodCoreInformer() v1coreinformers.PodInformer { return wf.iFactory.Core().V1().Pods() } -func (wf *WatchFactory) NamespaceInformer() cache.SharedIndexInformer { - return wf.informers[NamespaceType].inf +func (wf *WatchFactory) NamespaceInformer() v1coreinformers.NamespaceInformer { + return wf.iFactory.Core().V1().Namespaces() } func (wf *WatchFactory) ServiceInformer() cache.SharedIndexInformer { diff --git a/go-controller/pkg/factory/mocks/NodeWatchFactory.go b/go-controller/pkg/factory/mocks/NodeWatchFactory.go index 67719f0540..8b2fe1629f 100644 --- a/go-controller/pkg/factory/mocks/NodeWatchFactory.go +++ b/go-controller/pkg/factory/mocks/NodeWatchFactory.go @@ -8,6 +8,8 @@ import ( factory "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + informerscorev1 "k8s.io/client-go/informers/core/v1" + labels "k8s.io/apimachinery/pkg/labels" mock "github.com/stretchr/testify/mock" @@ -381,6 +383,22 @@ func (_m *NodeWatchFactory) LocalPodInformer() cache.SharedIndexInformer { return r0 } +// NamespaceInformer provides a mock function with given fields: +func (_m *NodeWatchFactory) NamespaceInformer() informerscorev1.NamespaceInformer { + ret := _m.Called() + + var r0 informerscorev1.NamespaceInformer + if rf, ok := ret.Get(0).(func() informerscorev1.NamespaceInformer); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(informerscorev1.NamespaceInformer) + } + } + + return r0 +} + // NodeInformer provides a mock function with given fields: func (_m *NodeWatchFactory) NodeInformer() cache.SharedIndexInformer { ret := _m.Called() @@ -397,6 +415,22 @@ func (_m *NodeWatchFactory) NodeInformer() cache.SharedIndexInformer { return r0 } +// PodCoreInformer provides a mock function with given fields: +func (_m *NodeWatchFactory) PodCoreInformer() informerscorev1.PodInformer { + ret := _m.Called() + + var r0 informerscorev1.PodInformer + if rf, ok := ret.Get(0).(func() informerscorev1.PodInformer); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(informerscorev1.PodInformer) + } + } + + return r0 +} + // RemoveEndpointSliceHandler provides a mock function with given fields: handler func (_m *NodeWatchFactory) RemoveEndpointSliceHandler(handler *factory.Handler) { _m.Called(handler) diff --git a/go-controller/pkg/factory/types.go b/go-controller/pkg/factory/types.go index b4e2baaa5f..a80a120fef 100644 --- a/go-controller/pkg/factory/types.go +++ b/go-controller/pkg/factory/types.go @@ -4,6 +4,8 @@ import ( kapi "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" "k8s.io/apimachinery/pkg/labels" + coreinformers "k8s.io/client-go/informers/core/v1" + v1coreinformers "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/tools/cache" ) @@ -46,6 +48,8 @@ type NodeWatchFactory interface { NodeInformer() cache.SharedIndexInformer LocalPodInformer() cache.SharedIndexInformer + NamespaceInformer() coreinformers.NamespaceInformer + PodCoreInformer() v1coreinformers.PodInformer GetPods(namespace string) ([]*kapi.Pod, error) GetPod(namespace, name string) (*kapi.Pod, error) diff --git a/go-controller/pkg/kube/kube.go b/go-controller/pkg/kube/kube.go index c790df4b9b..681e012044 100644 --- a/go-controller/pkg/kube/kube.go +++ b/go-controller/pkg/kube/kube.go @@ -6,6 +6,7 @@ import ( ocpcloudnetworkapi "github.com/openshift/api/cloudnetwork/v1" ocpcloudnetworkclientset "github.com/openshift/client-go/cloudnetwork/clientset/versioned" + adminpolicybasedrouteclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressfirewallclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned" egressipv1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" @@ -73,6 +74,7 @@ type KubeOVN struct { EgressFirewallClient egressfirewallclientset.Interface CloudNetworkClient ocpcloudnetworkclientset.Interface EgressServiceClient egressserviceclientset.Interface + APBRouteClient adminpolicybasedrouteclientset.Interface } // SetAnnotationsOnPod takes the pod object and map of key/value string pairs to set as annotations diff --git a/go-controller/pkg/kube/mocks/Interface.go b/go-controller/pkg/kube/mocks/Interface.go index f54b087e2b..b9fa0ab582 100644 --- a/go-controller/pkg/kube/mocks/Interface.go +++ b/go-controller/pkg/kube/mocks/Interface.go @@ -3,13 +3,16 @@ package mocks import ( - corev1 "k8s.io/api/core/v1" + adminpolicybasedroutev1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + apicorev1 "k8s.io/api/core/v1" + + corev1 "k8s.io/client-go/kubernetes/typed/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" mock "github.com/stretchr/testify/mock" - v1 "k8s.io/client-go/kubernetes/typed/core/v1" + v1 "github.com/openshift/api/cloudnetwork/v1" ) // Interface is an autogenerated mock type for the Interface type @@ -17,16 +20,53 @@ type Interface struct { mock.Mock } +// CreateCloudPrivateIPConfig provides a mock function with given fields: cloudPrivateIPConfig +func (_m *Interface) CreateCloudPrivateIPConfig(cloudPrivateIPConfig *v1.CloudPrivateIPConfig) (*v1.CloudPrivateIPConfig, error) { + ret := _m.Called(cloudPrivateIPConfig) + + var r0 *v1.CloudPrivateIPConfig + if rf, ok := ret.Get(0).(func(*v1.CloudPrivateIPConfig) *v1.CloudPrivateIPConfig); ok { + r0 = rf(cloudPrivateIPConfig) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*v1.CloudPrivateIPConfig) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(*v1.CloudPrivateIPConfig) error); ok { + r1 = rf(cloudPrivateIPConfig) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// DeleteCloudPrivateIPConfig provides a mock function with given fields: name +func (_m *Interface) DeleteCloudPrivateIPConfig(name string) error { + ret := _m.Called(name) + + var r0 error + if rf, ok := ret.Get(0).(func(string) error); ok { + r0 = rf(name) + } else { + r0 = ret.Error(0) + } + + return r0 +} + // Events provides a mock function with given fields: -func (_m *Interface) Events() v1.EventInterface { +func (_m *Interface) Events() corev1.EventInterface { ret := _m.Called() - var r0 v1.EventInterface - if rf, ok := ret.Get(0).(func() v1.EventInterface); ok { + var r0 corev1.EventInterface + if rf, ok := ret.Get(0).(func() corev1.EventInterface); ok { r0 = rf() } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(v1.EventInterface) + r0 = ret.Get(0).(corev1.EventInterface) } } @@ -57,15 +97,15 @@ func (_m *Interface) GetAnnotationsOnPod(namespace string, name string) (map[str } // GetNamespaces provides a mock function with given fields: labelSelector -func (_m *Interface) GetNamespaces(labelSelector metav1.LabelSelector) (*corev1.NamespaceList, error) { +func (_m *Interface) GetNamespaces(labelSelector metav1.LabelSelector) (*apicorev1.NamespaceList, error) { ret := _m.Called(labelSelector) - var r0 *corev1.NamespaceList - if rf, ok := ret.Get(0).(func(metav1.LabelSelector) *corev1.NamespaceList); ok { + var r0 *apicorev1.NamespaceList + if rf, ok := ret.Get(0).(func(metav1.LabelSelector) *apicorev1.NamespaceList); ok { r0 = rf(labelSelector) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.NamespaceList) + r0 = ret.Get(0).(*apicorev1.NamespaceList) } } @@ -80,15 +120,15 @@ func (_m *Interface) GetNamespaces(labelSelector metav1.LabelSelector) (*corev1. } // GetNode provides a mock function with given fields: name -func (_m *Interface) GetNode(name string) (*corev1.Node, error) { +func (_m *Interface) GetNode(name string) (*apicorev1.Node, error) { ret := _m.Called(name) - var r0 *corev1.Node - if rf, ok := ret.Get(0).(func(string) *corev1.Node); ok { + var r0 *apicorev1.Node + if rf, ok := ret.Get(0).(func(string) *apicorev1.Node); ok { r0 = rf(name) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.Node) + r0 = ret.Get(0).(*apicorev1.Node) } } @@ -103,15 +143,15 @@ func (_m *Interface) GetNode(name string) (*corev1.Node, error) { } // GetNodes provides a mock function with given fields: -func (_m *Interface) GetNodes() (*corev1.NodeList, error) { +func (_m *Interface) GetNodes() (*apicorev1.NodeList, error) { ret := _m.Called() - var r0 *corev1.NodeList - if rf, ok := ret.Get(0).(func() *corev1.NodeList); ok { + var r0 *apicorev1.NodeList + if rf, ok := ret.Get(0).(func() *apicorev1.NodeList); ok { r0 = rf() } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.NodeList) + r0 = ret.Get(0).(*apicorev1.NodeList) } } @@ -126,15 +166,15 @@ func (_m *Interface) GetNodes() (*corev1.NodeList, error) { } // GetPod provides a mock function with given fields: namespace, name -func (_m *Interface) GetPod(namespace string, name string) (*corev1.Pod, error) { +func (_m *Interface) GetPod(namespace string, name string) (*apicorev1.Pod, error) { ret := _m.Called(namespace, name) - var r0 *corev1.Pod - if rf, ok := ret.Get(0).(func(string, string) *corev1.Pod); ok { + var r0 *apicorev1.Pod + if rf, ok := ret.Get(0).(func(string, string) *apicorev1.Pod); ok { r0 = rf(namespace, name) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.Pod) + r0 = ret.Get(0).(*apicorev1.Pod) } } @@ -149,15 +189,15 @@ func (_m *Interface) GetPod(namespace string, name string) (*corev1.Pod, error) } // GetPods provides a mock function with given fields: namespace, labelSelector -func (_m *Interface) GetPods(namespace string, labelSelector metav1.LabelSelector) (*corev1.PodList, error) { +func (_m *Interface) GetPods(namespace string, labelSelector metav1.LabelSelector) (*apicorev1.PodList, error) { ret := _m.Called(namespace, labelSelector) - var r0 *corev1.PodList - if rf, ok := ret.Get(0).(func(string, metav1.LabelSelector) *corev1.PodList); ok { + var r0 *apicorev1.PodList + if rf, ok := ret.Get(0).(func(string, metav1.LabelSelector) *apicorev1.PodList); ok { r0 = rf(namespace, labelSelector) } else { if ret.Get(0) != nil { - r0 = ret.Get(0).(*corev1.PodList) + r0 = ret.Get(0).(*apicorev1.PodList) } } @@ -172,11 +212,11 @@ func (_m *Interface) GetPods(namespace string, labelSelector metav1.LabelSelecto } // PatchNode provides a mock function with given fields: old, new -func (_m *Interface) PatchNode(old *corev1.Node, new *corev1.Node) error { +func (_m *Interface) PatchNode(old *apicorev1.Node, new *apicorev1.Node) error { ret := _m.Called(old, new) var r0 error - if rf, ok := ret.Get(0).(func(*corev1.Node, *corev1.Node) error); ok { + if rf, ok := ret.Get(0).(func(*apicorev1.Node, *apicorev1.Node) error); ok { r0 = rf(old, new) } else { r0 = ret.Error(0) @@ -186,11 +226,11 @@ func (_m *Interface) PatchNode(old *corev1.Node, new *corev1.Node) error { } // RemoveTaintFromNode provides a mock function with given fields: nodeName, taint -func (_m *Interface) RemoveTaintFromNode(nodeName string, taint *corev1.Taint) error { +func (_m *Interface) RemoveTaintFromNode(nodeName string, taint *apicorev1.Taint) error { ret := _m.Called(nodeName, taint) var r0 error - if rf, ok := ret.Get(0).(func(string, *corev1.Taint) error); ok { + if rf, ok := ret.Get(0).(func(string, *apicorev1.Taint) error); ok { r0 = rf(nodeName, taint) } else { r0 = ret.Error(0) @@ -256,11 +296,11 @@ func (_m *Interface) SetAnnotationsOnService(namespace string, serviceName strin } // SetTaintOnNode provides a mock function with given fields: nodeName, taint -func (_m *Interface) SetTaintOnNode(nodeName string, taint *corev1.Taint) error { +func (_m *Interface) SetTaintOnNode(nodeName string, taint *apicorev1.Taint) error { ret := _m.Called(nodeName, taint) var r0 error - if rf, ok := ret.Get(0).(func(string, *corev1.Taint) error); ok { + if rf, ok := ret.Get(0).(func(string, *apicorev1.Taint) error); ok { r0 = rf(nodeName, taint) } else { r0 = ret.Error(0) @@ -269,12 +309,35 @@ func (_m *Interface) SetTaintOnNode(nodeName string, taint *corev1.Taint) error return r0 } +// UpdateCloudPrivateIPConfig provides a mock function with given fields: cloudPrivateIPConfig +func (_m *Interface) UpdateCloudPrivateIPConfig(cloudPrivateIPConfig *v1.CloudPrivateIPConfig) (*v1.CloudPrivateIPConfig, error) { + ret := _m.Called(cloudPrivateIPConfig) + + var r0 *v1.CloudPrivateIPConfig + if rf, ok := ret.Get(0).(func(*v1.CloudPrivateIPConfig) *v1.CloudPrivateIPConfig); ok { + r0 = rf(cloudPrivateIPConfig) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*v1.CloudPrivateIPConfig) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(*v1.CloudPrivateIPConfig) error); ok { + r1 = rf(cloudPrivateIPConfig) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + // UpdateNode provides a mock function with given fields: node -func (_m *Interface) UpdateNode(node *corev1.Node) error { +func (_m *Interface) UpdateNode(node *apicorev1.Node) error { ret := _m.Called(node) var r0 error - if rf, ok := ret.Get(0).(func(*corev1.Node) error); ok { + if rf, ok := ret.Get(0).(func(*apicorev1.Node) error); ok { r0 = rf(node) } else { r0 = ret.Error(0) @@ -298,11 +361,11 @@ func (_m *Interface) UpdateEgressServiceStatus(namespace string, name string, ho } // UpdateNodeStatus provides a mock function with given fields: node -func (_m *Interface) UpdateNodeStatus(node *corev1.Node) error { +func (_m *Interface) UpdateNodeStatus(node *apicorev1.Node) error { ret := _m.Called(node) var r0 error - if rf, ok := ret.Get(0).(func(*corev1.Node) error); ok { + if rf, ok := ret.Get(0).(func(*apicorev1.Node) error); ok { r0 = rf(node) } else { r0 = ret.Error(0) @@ -312,11 +375,11 @@ func (_m *Interface) UpdateNodeStatus(node *corev1.Node) error { } // UpdatePod provides a mock function with given fields: pod -func (_m *Interface) UpdatePod(pod *corev1.Pod) error { +func (_m *Interface) UpdatePod(pod *apicorev1.Pod) error { ret := _m.Called(pod) var r0 error - if rf, ok := ret.Get(0).(func(*corev1.Pod) error); ok { + if rf, ok := ret.Get(0).(func(*apicorev1.Pod) error); ok { r0 = rf(pod) } else { r0 = ret.Error(0) @@ -325,6 +388,20 @@ func (_m *Interface) UpdatePod(pod *corev1.Pod) error { return r0 } +// UpdateStatusAPBExternalRoute provides a mock function with given fields: route +func (_m *Interface) UpdateStatusAPBExternalRoute(route *adminpolicybasedroutev1.AdminPolicyBasedExternalRoute) error { + ret := _m.Called(route) + + var r0 error + if rf, ok := ret.Get(0).(func(*adminpolicybasedroutev1.AdminPolicyBasedExternalRoute) error); ok { + r0 = rf(route) + } else { + r0 = ret.Error(0) + } + + return r0 +} + type mockConstructorTestingTNewInterface interface { mock.TestingT Cleanup(func()) diff --git a/go-controller/pkg/libovsdbops/router.go b/go-controller/pkg/libovsdbops/router.go index d34852e4f7..9c8467e9ee 100644 --- a/go-controller/pkg/libovsdbops/router.go +++ b/go-controller/pkg/libovsdbops/router.go @@ -787,6 +787,24 @@ func DeleteBFDs(nbClient libovsdbclient.Client, bfds ...*nbdb.BFD) error { return m.Delete(opModels...) } +func LookupBFD(nbClient libovsdbclient.Client, bfd *nbdb.BFD) (*nbdb.BFD, error) { + found := []*nbdb.BFD{} + opModel := operationModel{ + Model: bfd, + ModelPredicate: func(item *nbdb.BFD) bool { return item.DstIP == bfd.DstIP && item.LogicalPort == bfd.LogicalPort }, + ExistingResult: &found, + ErrNotFound: true, + BulkOp: false, + } + + m := newModelClient(nbClient) + err := m.Lookup(opModel) + if err != nil { + return nil, err + } + return found[0], nil +} + // LB OPs // AddLoadBalancersToLogicalRouterOps adds the provided load balancers to the diff --git a/go-controller/pkg/network-controller-manager/network_controller_manager.go b/go-controller/pkg/network-controller-manager/network_controller_manager.go index c5f97cc555..e4ee0b217d 100644 --- a/go-controller/pkg/network-controller-manager/network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/network_controller_manager.go @@ -191,6 +191,7 @@ func NewNetworkControllerManager(ovnClient *util.OVNClientset, identity string, EgressFirewallClient: ovnClient.EgressFirewallClient, CloudNetworkClient: ovnClient.CloudNetworkClient, EgressServiceClient: ovnClient.EgressServiceClient, + APBRouteClient: ovnClient.AdminPolicyRouteClient, }, stopChan: make(chan struct{}), watchFactory: wf, diff --git a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go index d59f4d9c05..059696b501 100644 --- a/go-controller/pkg/network-controller-manager/node_network_controller_manager.go +++ b/go-controller/pkg/network-controller-manager/node_network_controller_manager.go @@ -16,7 +16,6 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "k8s.io/apimachinery/pkg/util/wait" - clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" kexec "k8s.io/utils/exec" @@ -24,12 +23,12 @@ import ( // nodeNetworkControllerManager structure is the object manages all controllers for all networks for ovnkube-node type nodeNetworkControllerManager struct { - name string - client clientset.Interface - Kube kube.Interface - watchFactory factory.NodeWatchFactory - stopChan chan struct{} - recorder record.EventRecorder + name string + ovnNodeClient *util.OVNNodeClientset + Kube kube.Interface + watchFactory factory.NodeWatchFactory + stopChan chan struct{} + recorder record.EventRecorder defaultNodeNetworkController nad.BaseNetworkController @@ -55,19 +54,19 @@ func (ncm *nodeNetworkControllerManager) CleanupDeletedNetworks(allControllers [ // newCommonNetworkControllerInfo creates and returns the base node network controller info func (ncm *nodeNetworkControllerManager) newCommonNetworkControllerInfo() *node.CommonNodeNetworkControllerInfo { - return node.NewCommonNodeNetworkControllerInfo(ncm.client, ncm.watchFactory, ncm.recorder, ncm.name) + return node.NewCommonNodeNetworkControllerInfo(ncm.ovnNodeClient.KubeClient, ncm.ovnNodeClient.AdminPolicyRouteClient, ncm.watchFactory, ncm.recorder, ncm.name) } // NewNodeNetworkControllerManager creates a new OVN controller manager to manage all the controller for all networks func NewNodeNetworkControllerManager(ovnClient *util.OVNClientset, wf factory.NodeWatchFactory, name string, eventRecorder record.EventRecorder) (*nodeNetworkControllerManager, error) { ncm := &nodeNetworkControllerManager{ - name: name, - client: ovnClient.KubeClient, - Kube: &kube.Kube{KClient: ovnClient.KubeClient}, - watchFactory: wf, - stopChan: make(chan struct{}), - recorder: eventRecorder, + name: name, + ovnNodeClient: &util.OVNNodeClientset{KubeClient: ovnClient.KubeClient, AdminPolicyRouteClient: ovnClient.AdminPolicyRouteClient}, + Kube: &kube.Kube{KClient: ovnClient.KubeClient}, + watchFactory: wf, + stopChan: make(chan struct{}), + recorder: eventRecorder, } // need to configure OVS interfaces for Pods on secondary networks in the DPU mode diff --git a/go-controller/pkg/node/base_node_network_controller_dpu_test.go b/go-controller/pkg/node/base_node_network_controller_dpu_test.go index e20e57043e..006c77944e 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu_test.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu_test.go @@ -10,6 +10,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" factorymocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory/mocks" kubemocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube/mocks" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -105,8 +106,9 @@ var _ = Describe("Node DPU tests", func() { Expect(err).NotTo(HaveOccurred()) kubeMock = kubemocks.Interface{} + apbExternalRouteClient := adminpolicybasedrouteclient.NewSimpleClientset() factoryMock = factorymocks.NodeWatchFactory{} - cnnci := newCommonNodeNetworkControllerInfo(nil, &kubeMock, &factoryMock, nil, "") + cnnci := newCommonNodeNetworkControllerInfo(nil, &kubeMock, apbExternalRouteClient, &factoryMock, nil, "") dnnc = newDefaultNodeNetworkController(cnnci, nil, nil) podNamespaceLister = v1mocks.PodNamespaceLister{} diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 04bd6f1e69..29da5cb2bb 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -25,6 +25,7 @@ import ( honode "github.com/ovn-org/ovn-kubernetes/go-controller/hybrid-overlay/pkg/controller" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/informer" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" @@ -32,6 +33,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/upgrade" nodeipt "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/ovspinning" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/apbroute" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/healthcheck" retry "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/retry" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -41,11 +43,12 @@ import ( ) type CommonNodeNetworkControllerInfo struct { - client clientset.Interface - Kube kube.Interface - watchFactory factory.NodeWatchFactory - recorder record.EventRecorder - name string + client clientset.Interface + Kube kube.Interface + watchFactory factory.NodeWatchFactory + recorder record.EventRecorder + name string + apbExternalRouteClient adminpolicybasedrouteclientset.Interface } // BaseNodeNetworkController structure per-network fields and network specific configuration @@ -67,22 +70,23 @@ type BaseNodeNetworkController struct { wg *sync.WaitGroup } -func newCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, kube kube.Interface, +func newCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, kube kube.Interface, apbExternalRouteClient adminpolicybasedrouteclientset.Interface, wf factory.NodeWatchFactory, eventRecorder record.EventRecorder, name string) *CommonNodeNetworkControllerInfo { return &CommonNodeNetworkControllerInfo{ - client: kubeClient, - Kube: kube, - watchFactory: wf, - name: name, - recorder: eventRecorder, + client: kubeClient, + Kube: kube, + apbExternalRouteClient: apbExternalRouteClient, + watchFactory: wf, + name: name, + recorder: eventRecorder, } } // NewCommonNodeNetworkControllerInfo creates and returns the base node network controller info -func NewCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, wf factory.NodeWatchFactory, +func NewCommonNodeNetworkControllerInfo(kubeClient clientset.Interface, apbExternalRouteClient adminpolicybasedrouteclientset.Interface, wf factory.NodeWatchFactory, eventRecorder record.EventRecorder, name string) *CommonNodeNetworkControllerInfo { - return newCommonNodeNetworkControllerInfo(kubeClient, &kube.Kube{KClient: kubeClient}, wf, eventRecorder, name) + return newCommonNodeNetworkControllerInfo(kubeClient, &kube.Kube{KClient: kubeClient}, apbExternalRouteClient, wf, eventRecorder, name) } // DefaultNodeNetworkController is the object holder for utilities meant for node management of default network @@ -99,10 +103,13 @@ type DefaultNodeNetworkController struct { retryNamespaces *retry.RetryFramework // retry framework for endpoint slices, used for the removal of stale conntrack entries for services retryEndpointSlices *retry.RetryFramework + + apbExternalRouteNodeController *apbroute.ExternalGatewayNodeController } func newDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, stopChan chan struct{}, wg *sync.WaitGroup) *DefaultNodeNetworkController { + return &DefaultNodeNetworkController{ BaseNodeNetworkController: BaseNodeNetworkController{ CommonNodeNetworkControllerInfo: *cnnci, @@ -116,13 +123,13 @@ func newDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, sto // NewDefaultNodeNetworkController creates a new network controller for node management of the default network func NewDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo) (*DefaultNodeNetworkController, error) { + var err error stopChan := make(chan struct{}) wg := &sync.WaitGroup{} nc := newDefaultNodeNetworkController(cnnci, stopChan, wg) if len(config.Kubernetes.HealthzBindAddress) != 0 { klog.Infof("Enable node proxy healthz server on %s", config.Kubernetes.HealthzBindAddress) - var err error nc.healthzServer, err = newNodeProxyHealthzServer( nc.name, config.Kubernetes.HealthzBindAddress, nc.recorder, nc.watchFactory) if err != nil { @@ -130,7 +137,17 @@ func NewDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo) (*D } } + nc.apbExternalRouteNodeController, err = apbroute.NewExternalNodeController( + cnnci.apbExternalRouteClient, + nc.watchFactory.PodCoreInformer(), + nc.watchFactory.NamespaceInformer(), + stopChan) + if err != nil { + return nil, err + } + nc.initRetryFrameworkForNode() + return nc, nil } @@ -917,6 +934,11 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { c.Run(1) }() } + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + nc.apbExternalRouteNodeController.Run(1) + }() nc.wg.Add(1) go func() { @@ -1049,13 +1071,17 @@ func (nc *DefaultNodeNetworkController) checkAndDeleteStaleConntrackEntries() { } func (nc *DefaultNodeNetworkController) syncConntrackForExternalGateways(newNs *kapi.Namespace) error { + gatewayIPs, err := nc.apbExternalRouteNodeController.GetAdminPolicyBasedExternalRouteIPsForTargetNamespace(newNs.Name) + if err != nil { + klog.Errorf("Unable to retrieve Admin Policy Based External Route objects:%v", err) + } // loop through all the IPs on the annotations; ARP for their MACs and form an allowlist - gatewayIPs := strings.Split(newNs.Annotations[util.ExternalGatewayPodIPsAnnotation], ",") - gatewayIPs = append(gatewayIPs, strings.Split(newNs.Annotations[util.RoutingExternalGWsAnnotation], ",")...) + gatewayIPs = gatewayIPs.Insert(strings.Split(newNs.Annotations[util.ExternalGatewayPodIPsAnnotation], ",")...) + gatewayIPs = gatewayIPs.Insert(strings.Split(newNs.Annotations[util.RoutingExternalGWsAnnotation], ",")...) var wg sync.WaitGroup wg.Add(len(gatewayIPs)) validMACs := sync.Map{} - for _, gwIP := range gatewayIPs { + for gwIP := range gatewayIPs { go func(gwIP string) { defer wg.Done() if len(gwIP) > 0 && !utilnet.IsIPv6String(gwIP) { diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index b121126cbc..668a04de71 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -23,6 +23,7 @@ import ( "k8s.io/client-go/kubernetes/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" @@ -706,7 +707,8 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, Items: []v1.Node{existingNode}, }) fakeClient := &util.OVNNodeClientset{ - KubeClient: kubeFakeClient, + KubeClient: kubeFakeClient, + AdminPolicyRouteClient: adminpolicybasedrouteclient.NewSimpleClientset(), } stop := make(chan struct{}) @@ -721,7 +723,7 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, err = wf.Start() Expect(err).NotTo(HaveOccurred()) - cnnci := NewCommonNodeNetworkControllerInfo(nil, wf, nil, nodeName) + cnnci := NewCommonNodeNetworkControllerInfo(nil, fakeClient.AdminPolicyRouteClient, wf, nil, nodeName) nc := newDefaultNodeNetworkController(cnnci, stop, wg) // must run route manager manually which is usually started with nc.Start() wg.Add(1) diff --git a/go-controller/pkg/node/ovn_test.go b/go-controller/pkg/node/ovn_test.go index 0f82c1d7fd..a5ddb899fc 100644 --- a/go-controller/pkg/node/ovn_test.go +++ b/go-controller/pkg/node/ovn_test.go @@ -6,6 +6,7 @@ import ( . "github.com/onsi/gomega" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" egressserviceapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" @@ -54,8 +55,9 @@ func (o *FakeOVNNode) start(ctx *cli.Context, objects ...runtime.Object) { Expect(err).NotTo(HaveOccurred()) o.fakeClient = &util.OVNNodeClientset{ - KubeClient: fake.NewSimpleClientset(v1Objects...), - EgressServiceClient: egressservicefake.NewSimpleClientset(egressServiceObjects...), + KubeClient: fake.NewSimpleClientset(v1Objects...), + EgressServiceClient: egressservicefake.NewSimpleClientset(egressServiceObjects...), + AdminPolicyRouteClient: adminpolicybasedrouteclient.NewSimpleClientset(), } o.init() // initializes the node } @@ -79,7 +81,7 @@ func (o *FakeOVNNode) init() { o.watcher, err = factory.NewNodeWatchFactory(o.fakeClient, fakeNodeName) Expect(err).NotTo(HaveOccurred()) - cnnci := NewCommonNodeNetworkControllerInfo(o.fakeClient.KubeClient, o.watcher, o.recorder, fakeNodeName) + cnnci := NewCommonNodeNetworkControllerInfo(o.fakeClient.KubeClient, o.fakeClient.AdminPolicyRouteClient, o.watcher, o.recorder, fakeNodeName) o.nc = newDefaultNodeNetworkController(cnnci, o.stopChan, o.wg) // watcher is started by nodeNetworkControllerManager, not by nodeNetworkcontroller, so start it here. o.watcher.Start() diff --git a/go-controller/pkg/ovn/controller/apbroute/apbroute_suite_test.go b/go-controller/pkg/ovn/controller/apbroute/apbroute_suite_test.go new file mode 100644 index 0000000000..1305fd2bad --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/apbroute_suite_test.go @@ -0,0 +1,13 @@ +package apbroute + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +func TestApbroute(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Admin Based Policy External Route Controller Suite") +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller.go b/go-controller/pkg/ovn/controller/apbroute/external_controller.go new file mode 100644 index 0000000000..ea6f2d23fb --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller.go @@ -0,0 +1,372 @@ +package apbroute + +import ( + "fmt" + "strings" + "sync" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedroutelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" +) + +type gatewayInfoList []*gatewayInfo + +func (g gatewayInfoList) String() string { + + s := strings.Builder{} + for _, item := range g { + s.WriteString(fmt.Sprintf("%s, ", item.gws)) + } + return s.String() +} + +func (g gatewayInfoList) HasIP(ip string) bool { + for _, i := range g { + if i.gws.Has(ip) { + return true + } + } + return false +} + +func (g gatewayInfoList) Insert(items ...*gatewayInfo) (gatewayInfoList, sets.Set[string]) { + ret := append(gatewayInfoList{}, g...) + duplicated := sets.New[string]() + for _, item := range items { + for _, ip := range item.gws.UnsortedList() { + if ret.HasIP(ip) { + duplicated = duplicated.Insert(ip) + continue + } + ret = append(ret, item) + } + } + return ret, duplicated +} +func (g gatewayInfoList) Delete(item *gatewayInfo) gatewayInfoList { + ret := gatewayInfoList{} + for _, i := range g { + if !i.gws.Equal(item.gws) { + ret, _ = ret.Insert(i) + } + } + return ret +} + +func (g gatewayInfoList) Len() int { + return len(g) +} + +func (g gatewayInfoList) Less(i, j int) bool { return lessGWsIP(g[i], g[j]) } +func (g gatewayInfoList) Swap(i, j int) { g[i], g[j] = g[j], g[i] } + +func lessGWsIP(l, r *gatewayInfo) bool { + + for lip := range l.gws { + for rip := range r.gws { + if lip > rip { + return false + } + } + } + return true +} + +type gatewayInfo struct { + gws sets.Set[string] + bfdEnabled bool +} +type namespaceInfo struct { + policies sets.Set[string] + staticGateways gatewayInfoList + dynamicGateways map[ktypes.NamespacedName]*gatewayInfo +} + +func newNamespaceInfo() *namespaceInfo { + return &namespaceInfo{ + policies: sets.New[string](), + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo), + staticGateways: gatewayInfoList{}, + } +} + +type routeInfo struct { + policy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + toBeDeleted bool +} + +type ExternalRouteInfo struct { + sync.Mutex + Deleted bool + PodName ktypes.NamespacedName + // PodExternalRoutes is a cache keeping the LR routes added to the GRs when + // external gateways are used. The first map key is the podIP (src-ip of the route), + // the second the GW IP (next hop), and the third the GR name + PodExternalRoutes map[string]map[string]string +} + +// This structure contains the processed information of a policy. +// This information is then used to update the network components (North Bound DB, conntrack) by applying the IPs here to each of the target namespaces defined in the from field. +type routePolicy struct { + // targetNamespacesSelector contains the namespace selector defined in the from field in the policy. + targetNamespacesSelector *metav1.LabelSelector + // staticGateways contains the processed list of IPs and BFD information defined in the staticHop slice in the policy. + staticGateways gatewayInfoList + // dynamicGateways contains the processed list of IPs and BFD information defined in the dynamicHop slice in the policy. + // the IP and BFD information of each pod gateway is stored in a map where the key is of type NamespacedName with the namespace and podName as values + // and the value is the gatewayInfo, which contains a set of IPs and the flag to determine if the BFD protocol is to be enabled for this IP + dynamicGateways map[ktypes.NamespacedName]*gatewayInfo +} + +type externalPolicyManager struct { + stopCh <-chan struct{} + // route policies + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister + // Pods + podLister corev1listers.PodLister + // Namespaces + namespaceLister corev1listers.NamespaceLister + // cache for set of policies impacting a given namespace + namespaceInfoSyncCache *syncmap.SyncMap[*namespaceInfo] + routePolicySyncCache *syncmap.SyncMap[*routeInfo] + // networkClient is an interface that exposes add and delete GW IPs. There are 2 structs that implement this contract: one to interface with the north bound DB and another one for the conntrack. + // the north bound is used by the master controller to add and delete the logical static routes, whilst the conntrack is used by the node controller to ensure that the ECMP entries are removed + // when a gateway IP is no longer an egress access point. + netClient networkClient + // flag used to determine if the repair() function has completed populating the policy route cache. + routePolicyCachePopulated bool + mutexRoutePolicyCachePopulated *sync.Mutex +} + +func newExternalPolicyManager( + stopCh <-chan struct{}, + podLister corev1listers.PodLister, + namespaceLister corev1listers.NamespaceLister, + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister, + netClient networkClient) *externalPolicyManager { + + m := externalPolicyManager{ + stopCh: stopCh, + routeLister: routeLister, + podLister: podLister, + namespaceLister: namespaceLister, + namespaceInfoSyncCache: syncmap.NewSyncMap[*namespaceInfo](), + routePolicySyncCache: syncmap.NewSyncMap[*routeInfo](), + netClient: netClient, + mutexRoutePolicyCachePopulated: &sync.Mutex{}, + } + + return &m +} + +func (m *externalPolicyManager) setRoutePolicyCacheAsPopulated() { + m.mutexRoutePolicyCachePopulated.Lock() + defer m.mutexRoutePolicyCachePopulated.Unlock() + m.routePolicyCachePopulated = true +} + +func (m *externalPolicyManager) isRoutePolicyCachePopulated() bool { + m.mutexRoutePolicyCachePopulated.Lock() + defer m.mutexRoutePolicyCachePopulated.Unlock() + return m.routePolicyCachePopulated +} + +// getRoutePolicyFromCache retrieves the cached value of the policy if it exists in the cache, as well as locking the key in case it exists. +func (m *externalPolicyManager) getRoutePolicyFromCache(policyName string) (adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, bool, bool) { + var ( + policy adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + found, markedForDeletion bool + ) + _ = m.routePolicySyncCache.DoWithLock(policyName, func(policyName string) error { + ri, f := m.routePolicySyncCache.Load(policyName) + if !f { + return nil + } + found = f + policy = *ri.policy + markedForDeletion = ri.toBeDeleted + return nil + }) + return policy, found, markedForDeletion +} + +func (m *externalPolicyManager) storeRoutePolicyInCache(policyInfo *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + return m.routePolicySyncCache.DoWithLock(policyInfo.Name, func(policyName string) error { + ri, found := m.routePolicySyncCache.Load(policyName) + if !found { + m.routePolicySyncCache.LoadOrStore(policyName, &routeInfo{policy: policyInfo}) + return nil + } + if ri.toBeDeleted { + return fmt.Errorf("attempting to store policy %s that is in the process of being deleted", policyInfo.Name) + } + ri.policy = policyInfo + return nil + }) +} + +func (m *externalPolicyManager) deleteRoutePolicyFromCache(policyName string) error { + return m.routePolicySyncCache.DoWithLock(policyName, func(policyName string) error { + ri, found := m.routePolicySyncCache.Load(policyName) + if found && !ri.toBeDeleted { + return fmt.Errorf("attempting to delete route policy %s from cache before it has been marked for deletion", policyName) + } + m.routePolicySyncCache.Delete(policyName) + return nil + }) +} + +// getAndMarkRoutePolicyForDeletionInCache flags a route policy for deletion and returns its cached value. This mark is used as a flag for other routines that attempt to retrieve the policy +// while processing pods or namespaces related to the given policy. +func (m *externalPolicyManager) getAndMarkRoutePolicyForDeletionInCache(policyName string) (adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, bool) { + var ( + exists bool + routePolicy adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + ) + _ = m.routePolicySyncCache.DoWithLock(policyName, func(policyName string) error { + ri, found := m.routePolicySyncCache.Load(policyName) + if !found { + return nil + } + ri.toBeDeleted = true + exists = true + routePolicy = *ri.policy + return nil + }) + return routePolicy, exists +} + +func (m *externalPolicyManager) getNamespaceInfoFromCache(namespaceName string) (*namespaceInfo, bool) { + m.namespaceInfoSyncCache.LockKey(namespaceName) + nsInfo, ok := m.namespaceInfoSyncCache.Load(namespaceName) + if !ok { + m.namespaceInfoSyncCache.UnlockKey(namespaceName) + return nil, false + } + return nsInfo, true +} + +func (m *externalPolicyManager) deleteNamespaceInfoInCache(namespaceName string) { + m.namespaceInfoSyncCache.Delete(namespaceName) +} + +func (m *externalPolicyManager) unlockNamespaceInfoCache(namespaceName string) { + m.namespaceInfoSyncCache.UnlockKey(namespaceName) +} + +func (m *externalPolicyManager) newNamespaceInfoInCache(namespaceName string) *namespaceInfo { + m.namespaceInfoSyncCache.LockKey(namespaceName) + nsInfo, _ := m.namespaceInfoSyncCache.LoadOrStore(namespaceName, newNamespaceInfo()) + return nsInfo +} + +func (m *externalPolicyManager) listNamespaceInfoCache() []string { + return m.namespaceInfoSyncCache.GetKeys() +} + +func (m *externalPolicyManager) getAllRoutePolicies() ([]*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, error) { + var ( + routePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + err error + ) + // avoid hitting the informer if the route policies have already been cached during the execution of the repair() function. + if m.isRoutePolicyCachePopulated() { + keys := m.routePolicySyncCache.GetKeys() + for _, policyName := range keys { + rp, found, markedForDelete := m.getRoutePolicyFromCache(policyName) + // ignore route policies that have been marked for deletion. They will soon be parted from this cluster. + if !found || (found && markedForDelete) { + continue + } + routePolicies = append(routePolicies, &rp) + } + return routePolicies, nil + } + + routePolicies, err = m.routeLister.List(labels.Everything()) + if err != nil { + klog.Errorf("Failed to list Admin Policy Based External Routes:%v", err) + return nil, err + } + return routePolicies, nil +} + +// getDynamicGatewayIPsForTargetNamespace is called by the annotation logic to identify if a namespace is managed by an CR. +// Since the call can occur outside the lifecycle of the controller, it cannot rely on the namespace info cache object to have been populated. +// Therefore it has to go through all policies until it identifies one that targets the namespace and retrieve the gateway IPs. +// these IPs are used by the annotation logic to determine which ones to remove from the north bound DB (the ones not included in the list), +// and the ones to keep (the ones that match both the annotation and the CR). +// This logic ensures that both CR and annotations can coexist without duplicating gateway IPs. +func (m *externalPolicyManager) getDynamicGatewayIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + policyGWIPs := sets.New[string]() + + routePolicies, err := m.getAllRoutePolicies() + if err != nil { + return nil, err + } + for _, routePolicy := range routePolicies { + p, err := m.processExternalRoutePolicy(routePolicy) + if err != nil { + klog.Errorf("Failed to process Admin Policy Based External Route %s: %v", routePolicy.Name, err) + return nil, err + } + targetNs, err := m.listNamespacesBySelector(p.targetNamespacesSelector) + if err != nil { + klog.Errorf("Failed to process namespace selector for Admin Policy Based External Route %s:%v", routePolicy.Name, err) + return nil, err + } + for _, ns := range targetNs { + if ns.Name == namespaceName { + // only collect the dynamic gateways + for _, gwInfo := range p.dynamicGateways { + policyGWIPs = policyGWIPs.Union(gwInfo.gws) + } + } + } + } + return policyGWIPs, nil +} + +// getStaticGatewayIPsForTargetNamespace is called by the annotation logic to identify if a namespace is managed by an CR. +// Since the call can occur outside the lifecycle of the controller, it cannot rely on the namespace info cache object to have been populated. +// Therefore it has to go through all policies until it identifies one that targets the namespace and retrieve the gateway IPs. +// these IPs are used by the annotation logic to determine which ones to remove from the north bound DB (the ones not included in the list), +// and the ones to keep (the ones that match both the annotation and the CR). +// This logic ensures that both CR and annotations can coexist without duplicating gateway IPs. +func (m *externalPolicyManager) getStaticGatewayIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + policyGWIPs := sets.New[string]() + + routePolicies, err := m.routeLister.List(labels.Everything()) + if err != nil { + klog.Errorf("Failed to list Admin Policy Based External Routes:%v", err) + return nil, err + } + for _, routePolicy := range routePolicies { + p, err := m.processExternalRoutePolicy(routePolicy) + if err != nil { + klog.Errorf("Failed to process Admin Policy Based External Route %s: %v", routePolicy.Name, err) + return nil, err + } + targetNs, err := m.listNamespacesBySelector(p.targetNamespacesSelector) + if err != nil { + klog.Errorf("Failed to process namespace selector for Admin Policy Based External Route %s:%v", routePolicy.Name, err) + return nil, err + } + for _, ns := range targetNs { + if ns.Name == namespaceName { + // only collect the static gateways + for _, gwInfo := range p.staticGateways { + policyGWIPs.Insert(gwInfo.gws.UnsortedList()...) + } + } + } + } + return policyGWIPs, nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace.go new file mode 100644 index 0000000000..f82b80a3f6 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace.go @@ -0,0 +1,137 @@ +package apbroute + +import ( + "fmt" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" +) + +// processAddNamespace takes in a namespace and applies the policies that are applicable to the namespace, previously stored in the cacheInfo object argument. +// The logic goes through all the policies and applies the gateway IPs derived from the static and dynamic hop to all the pods in the namespace. +// Lastly, it updates the cacheInfo to contain the static and dynamic gateway IPs generated from the previous action to keep track of the gateway IPs applied in the namespace. +func (m *externalPolicyManager) processAddNamespace(new *v1.Namespace, cacheInfo *namespaceInfo) error { + staticGateways, dynamicGateways, err := m.aggregateNamespaceInfo(cacheInfo.policies) + if err != nil { + return err + } + cacheInfo.staticGateways = staticGateways + cacheInfo.dynamicGateways = dynamicGateways + return nil +} + +// processUpdateNamespace takes in a namespace name, current policies applied to the namespace, policies that are now expected to be applied to the namespace and the cache info +// that contains all the current gateway IPs and policies for that namespace. It follows this logic: +// * Calculate the difference between current and expected policies and proceed to remove the gateway IPs from the policies that are no longer applicable to this namespace +// * Calculate the difference between the expected and current ones to determine the new policies to be applied and proceed to apply them. +// * Update the cache info with the new list of policies, as well as the static and dynamic gateway IPs derived from executing the previous logic. +func (m *externalPolicyManager) processUpdateNamespace(namespaceName string, currentPolicies, newPolicies sets.Set[string], cacheInfo *namespaceInfo) error { + + // some differences apply, let's figure out if previous policies have been removed first + policiesNotValid := currentPolicies.Difference(newPolicies) + // iterate through the policies that no longer apply to this namespace + for policyName := range policiesNotValid { + err := m.removePolicyFromNamespaceWithName(namespaceName, policyName, cacheInfo) + if err != nil { + return err + } + } + + // policies that now apply to this namespace + newPoliciesDiff := newPolicies.Difference(currentPolicies) + for policyName := range newPoliciesDiff { + policy, found, markedForDeletion := m.getRoutePolicyFromCache(policyName) + if !found { + return fmt.Errorf("failed to find external route policy %s in cache", policyName) + } + if markedForDeletion { + klog.Infof("Skipping route policy %s as it has been marked for deletion", policyName) + continue + } + err := m.applyPolicyToNamespace(namespaceName, &policy, cacheInfo) + if err != nil { + return err + } + } + // at least one policy apply, let's update the cache + cacheInfo.policies = newPolicies + return nil + +} + +func (m *externalPolicyManager) applyPolicyToNamespace(namespaceName string, policy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, cacheInfo *namespaceInfo) error { + + processedPolicy, err := m.processExternalRoutePolicy(policy) + if err != nil { + return err + } + err = m.applyProcessedPolicyToNamespace(namespaceName, policy.Name, processedPolicy, cacheInfo) + if err != nil { + return err + } + return nil +} + +func (m *externalPolicyManager) removePolicyFromNamespaceWithName(targetNamespace, policyName string, cacheInfo *namespaceInfo) error { + policy, err := m.routeLister.Get(policyName) + if err != nil { + return err + } + return m.removePolicyFromNamespace(targetNamespace, policy, cacheInfo) +} +func (m *externalPolicyManager) removePolicyFromNamespace(targetNamespace string, policy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, cacheInfo *namespaceInfo) error { + + processedPolicy, err := m.processExternalRoutePolicy(policy) + if err != nil { + return err + } + err = m.deletePolicyInNamespace(targetNamespace, policy.Name, processedPolicy, cacheInfo) + if err != nil { + return err + } + cacheInfo.policies.Delete(policy.Name) + return nil +} + +func (m *externalPolicyManager) listNamespacesBySelector(selector *metav1.LabelSelector) ([]*v1.Namespace, error) { + s, err := metav1.LabelSelectorAsSelector(selector) + if err != nil { + return nil, err + } + ns, err := m.namespaceLister.List(s) + if err != nil { + return nil, err + } + return ns, nil + +} + +func (m *externalPolicyManager) aggregateNamespaceInfo(policies sets.Set[string]) (gatewayInfoList, map[ktypes.NamespacedName]*gatewayInfo, error) { + + static := gatewayInfoList{} + dynamic := make(map[ktypes.NamespacedName]*gatewayInfo) + for policyName := range policies { + externalPolicy, err := m.routeLister.Get(policyName) + if err != nil { + klog.Warningf("Unable to find route policy %s:%+v", policyName, err) + continue + } + processedPolicy, err := m.processExternalRoutePolicy(externalPolicy) + if err != nil { + return nil, nil, err + } + var duplicated sets.Set[string] + static, duplicated = static.Insert(processedPolicy.staticGateways...) + if duplicated.Len() > 0 { + klog.Warningf("Found duplicated gateway IP(s) %+s in policy(s) %+s", sets.List(duplicated), sets.List(policies)) + } + for podName, gatewayInfo := range processedPolicy.dynamicGateways { + dynamic[podName] = gatewayInfo + } + } + return static, dynamic, nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace_test.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace_test.go new file mode 100644 index 0000000000..0d8e124185 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_namespace_test.go @@ -0,0 +1,358 @@ +package apbroute + +import ( + "context" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes/fake" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + "k8s.io/apimachinery/pkg/runtime" +) + +const ( + annotatedPodIP = "192.168.2.1" + dynamicHopHostNetPodIP = "192.168.1.1" + staticHopGWIP = "10.10.10.1" +) + +func newPolicy(policyName string, fromNSSelector *v1.LabelSelector, staticHopsGWIPs sets.Set[string], dynamicHopsNSSelector *v1.LabelSelector, dynamicHopsPodSelector *v1.LabelSelector, bfdEnabled bool) *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute { + p := adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + ObjectMeta: v1.ObjectMeta{Name: policyName}, + Spec: adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{ + From: adminpolicybasedrouteapi.ExternalNetworkSource{ + NamespaceSelector: *fromNSSelector, + }, + NextHops: adminpolicybasedrouteapi.ExternalNextHops{}, + }, + } + + if staticHopsGWIPs.Len() > 0 { + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{} + for ip := range staticHopsGWIPs { + p.Spec.NextHops.StaticHops = append(p.Spec.NextHops.StaticHops, &adminpolicybasedrouteapi.StaticHop{IP: ip, BFDEnabled: bfdEnabled}) + } + } + if dynamicHopsNSSelector != nil && dynamicHopsPodSelector != nil { + p.Spec.NextHops.DynamicHops = []*adminpolicybasedrouteapi.DynamicHop{ + {NamespaceSelector: dynamicHopsNSSelector, + PodSelector: *dynamicHopsPodSelector, + BFDEnabled: bfdEnabled}, + } + } + return &p +} + +func deletePolicy(policyName string, fakeRouteClient *adminpolicybasedrouteclient.Clientset) { + err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Delete(context.TODO(), policyName, v1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func deleteNamespace(namespaceName string, fakeClient *fake.Clientset) { + + err = fakeClient.CoreV1().Namespaces().Delete(context.Background(), namespaceName, v1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func updateNamespaceLabel(namespaceName string, labels map[string]string, fakeClient *fake.Clientset) { + ns, err := fakeClient.CoreV1().Namespaces().Get(context.TODO(), namespaceName, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + incrementResourceVersion(ns) + ns.Labels = labels + _, err = fakeClient.CoreV1().Namespaces().Update(context.Background(), ns, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func getNamespaceInfo(namespaceName string) *namespaceInfo { + f, found := mgr.getNamespaceInfoFromCache(namespaceName) + if found { + cp := &namespaceInfo{} + deepCopyNamespaceInfo(f, cp) + mgr.unlockNamespaceInfoCache(namespaceName) + return cp + } + return f +} +func listNamespaceInfo() []string { + return mgr.namespaceInfoSyncCache.GetKeys() +} + +func deepCopyNamespaceInfo(source, destination *namespaceInfo) { + destination.policies = sets.New(source.policies.UnsortedList()...) + destination.staticGateways, _ = gatewayInfoList.Insert(source.staticGateways) + destination.dynamicGateways = make(map[ktypes.NamespacedName]*gatewayInfo) + for key, value := range source.dynamicGateways { + destination.dynamicGateways[key] = value + } +} + +var _ = Describe("OVN External Gateway namespace", func() { + + var ( + dynamicPolicy = newPolicy( + "dynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "pod"}}, + false, + ) + + staticPolicy = newPolicy( + "static", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP), + nil, + nil, + false, + ) + + annotatedPodGW = &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{Name: "annotatedPod", Namespace: "default", + Labels: map[string]string{"name": "annotatedPod"}, + Annotations: map[string]string{"k8s.ovn.org/routing-namespaces": "test", "k8s.ovn.org/routing-network": ""}, + }, + Spec: corev1.PodSpec{HostNetwork: true}, + Status: corev1.PodStatus{PodIPs: []corev1.PodIP{{IP: annotatedPodIP}}, Phase: corev1.PodRunning}, + } + + podGW = &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{Name: "pod", Namespace: "default", + Labels: map[string]string{"name": "pod"}}, + Spec: corev1.PodSpec{HostNetwork: true}, + Status: corev1.PodStatus{PodIPs: []corev1.PodIP{{IP: dynamicHopHostNetPodIP}}, Phase: corev1.PodRunning}, + } + namespaceDefault = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "default", + Labels: map[string]string{"name": "default"}}} + namespaceTest = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test", + Labels: map[string]string{"name": "test"}}, + } + namespaceTest2 = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test2", + Labels: map[string]string{"name": "test2"}}, + } + ) + AfterEach(func() { + nbsbCleanup.Cleanup() + }) + + BeforeEach(func() { + initialDB = libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + Name: "node1", + }, + }, + } + nbClient, _, nbsbCleanup, err = libovsdbtest.NewNBSBTestHarness(initialDB) + Expect(err).NotTo(HaveOccurred()) + stopChan = make(chan struct{}) + + }) + + var _ = Context("When no pod or namespace routing network annotations coexist with the policies", func() { + + var _ = Context("When creating new namespaces", func() { + + It("registers the new namespace with no matching policies", func() { + initController([]runtime.Object{namespaceTest2}, []runtime.Object{dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec { + p, found := externalController.mgr.routePolicySyncCache.Load(dynamicPolicy.Name) + if !found { + return adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{} + } + return p.policy.Spec + }, 5).Should(Equal(dynamicPolicy.Spec)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + }) + + It("registers the new namespace with one matching policy containing one static gateway", func() { + initController([]runtime.Object{namespaceTest}, []runtime.Object{staticPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec { + p, found := externalController.mgr.routePolicySyncCache.Load(staticPolicy.Name) + if !found { + return adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{} + } + return p.policy.Spec + }, 5).Should(Equal(staticPolicy.Spec)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + }) + It("registers a new namespace with one policy that includes a dynamic GW", func() { + initController([]runtime.Object{namespaceTest, namespaceDefault, podGW}, []runtime.Object{dynamicPolicy}) + + By("validating that the namespace cache contains the test namespace and that it reflect the applicable policy") + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + }) + + It("registers a new namespace with one policy with dynamic GWs and the IP of an annotated pod", func() { + + initController([]runtime.Object{namespaceTest, namespaceDefault, podGW, annotatedPodGW}, []runtime.Object{dynamicPolicy}) + + By("validating that the namespace cache contains the test namespace and that it reflect the applicable policy") + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + }) + + It("registers a new namespace with one policy and validates that the deleted field is set to false", func() { + initController([]runtime.Object{namespaceTest, namespaceDefault, podGW, annotatedPodGW}, []runtime.Object{dynamicPolicy}) + + deleteNamespace(namespaceTest.Name, fakeClient) + By("validating that the namespace cache no longer contains the test namespace") + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + + _, err = fakeClient.CoreV1().Namespaces().Create(context.TODO(), namespaceTest, v1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + By("validating that the namespace cache is contained in the namespace info cache and it reflects the correct policy") + + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + }) + }) + }) + + var _ = Context("When deleting a namespace", func() { + + It("validates that the namespace cache is empty and marked as deleted when the namespace was a recipient for policies", func() { + initController([]runtime.Object{namespaceTest}, []runtime.Object{staticPolicy}) + + Expect(externalController.mgr.namespaceInfoSyncCache.GetKeys()).To(HaveLen(0)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + + deleteNamespace(namespaceTest.Name, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + + }) + It("validates that the namespace cache is empty when the namespace that is recipient for any policy is deleted", func() { + initController([]runtime.Object{namespaceDefault}, []runtime.Object{staticPolicy}) + + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + deleteNamespace(namespaceDefault.Name, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + }) + + }) + + var _ = Context("When updating an existing namespace", func() { + + var ( + dynamicPolicyTest2 = newPolicy( + "dynamicPolicyTest2", + &v1.LabelSelector{MatchLabels: map[string]string{"key": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "pod"}}, + false, + ) + ) + It("validates that a namespace is targeted by an existing policy after its labels are updated to match the policy's label selector", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest2}, []runtime.Object{staticPolicy}) + + Eventually(func() []string { return listNamespaceInfo() }, 15).Should(HaveLen(0)) + updateNamespaceLabel(namespaceTest2.Name, staticPolicy.Spec.From.NamespaceSelector.MatchLabels, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 15).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 15).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + }) + It("validates that a namespace is no longer targeted by an existing policy when its labels are updated so that they don't match the policy's label selector", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest}, []runtime.Object{staticPolicy}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + updateNamespaceLabel(namespaceTest.Name, dynamicPolicyTest2.Spec.From.NamespaceSelector.MatchLabels, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(BeNil()) + }) + + It("validates that a namespace changes its policies when its labels are changed to match a different policy, resulting in the later on being the only policy applied to the namespace", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, podGW}, []runtime.Object{staticPolicy, dynamicPolicyTest2}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + updateNamespaceLabel(namespaceTest.Name, dynamicPolicyTest2.Spec.From.NamespaceSelector.MatchLabels, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicyTest2.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + + }) + + It("validates that a namespace is now targeted by a second policy once its labels are updated to match the first and second policy", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, podGW}, []runtime.Object{staticPolicy, dynamicPolicyTest2}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: make(map[ktypes.NamespacedName]*gatewayInfo)})) + aggregatedLabels := map[string]string{"name": "test", "key": "test"} + updateNamespaceLabel(namespaceTest.Name, aggregatedLabels, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name, dynamicPolicyTest2.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{{Namespace: podGW.Namespace, Name: podGW.Name}: {gws: sets.New(dynamicHopHostNetPodIP)}}})) + }) + }) + +}) diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_pod.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_pod.go new file mode 100644 index 0000000000..60f21181f0 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_pod.go @@ -0,0 +1,436 @@ +package apbroute + +import ( + "encoding/json" + "fmt" + "net" + "strings" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" +) + +// processAddPod covers 2 scenarios: +// 1) The pod is an external gateway, in which case it needs to propagate its IP to a set of pods in the cluster. +// Determining which namespaces to update is determined by matching the pod's namespace and label selector against +// all the existing Admin Policy Based External route CRs. It's a reverse lookup: +// +// pod GW -> dynamic hop -> APB External Route CR -> target namespaces (label selector in the CR's `From`` field) -> pods in namespace +// +// 2) The pod belongs to a namespace impacted by at least one APB External Route CR, in which case its logical routes need to be +// updated to reflect the external routes. +// +// A pod can only be either an external gateway or a consumer of an external route policy. +func (m *externalPolicyManager) processAddPod(newPod *v1.Pod) error { + + // the pod can either be a gateway pod or a standard pod that requires no processing from the external controller. + // to determine either way, find out which matching dynamic hops include this pod. If none applies, then this is + // a standard pod and all is needed is to update it's logical routes to include all the external gateways, if they exist. + podPolicies, err := m.findMatchingDynamicPolicies(newPod) + if err != nil { + return err + } + if len(podPolicies) > 0 { + // this is a gateway pod + klog.Infof("Adding pod gateway %s/%s for policy %+v", newPod.Namespace, newPod.Name, podPolicies) + return m.applyPodGWPolicies(newPod, podPolicies) + } + cacheInfo, found := m.getNamespaceInfoFromCache(newPod.Namespace) + if !found || (found && cacheInfo.policies.Len() == 0) { + // this is a standard pod and there are no external gateway policies applicable to the pod's namespace. Nothing to do + if !found { + return nil + } + m.unlockNamespaceInfoCache(newPod.Namespace) + return nil + } + defer m.unlockNamespaceInfoCache(newPod.Namespace) + // there are external gateway policies applicable to the pod's namespace. + klog.Infof("Applying policies to new pod %s/%s %+v", newPod.Namespace, newPod.Name, cacheInfo.policies) + return m.applyGatewayInfoToPod(newPod, cacheInfo.staticGateways, cacheInfo.dynamicGateways) +} + +func (m *externalPolicyManager) applyPodGWPolicies(pod *v1.Pod, externalRoutePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + for _, erp := range externalRoutePolicies { + err := m.applyPodGWPolicy(pod, erp) + if err != nil { + return err + } + } + return nil +} + +func (m *externalPolicyManager) applyPodGWPolicy(pod *v1.Pod, externalRoutePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + klog.Infof("Processing policy %s for pod %s/%s", externalRoutePolicy.Name, pod.Namespace, pod.Name) + routePolicy, err := m.getRoutePolicyForPodGateway(pod, externalRoutePolicy) + if err != nil { + return err + } + // update all namespaces targeted by this pod's policy to include the new pod IP as their external GW + err = m.applyProcessedPolicy(externalRoutePolicy.Name, routePolicy) + if err != nil { + return err + } + gwInfoMap, err := m.aggregateDynamicRouteGatewayInformation(pod, routePolicy) + if err != nil { + return err + } + key := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + // update the namespace information for each targeted namespace to reflect the gateway IPs that handle external traffic + for ns, gwInfo := range gwInfoMap { + cacheInfo, found := m.getNamespaceInfoFromCache(ns) + if !found { + klog.Warningf("Attempting to update the dynamic gateway information for pod %s in a namespace that does not exist %s", pod.Name, ns) + continue + } + // update the gwInfo in the namespace cache + cacheInfo.dynamicGateways[key] = gwInfo + m.unlockNamespaceInfoCache(ns) + if err != nil { + return err + } + } + return nil +} + +func (m *externalPolicyManager) removePodGatewayFromNamespace(nsName string, podNamespacedName ktypes.NamespacedName) error { + // retrieve the gateway information from the impacted namespace's cache + cacheInfo, found := m.getNamespaceInfoFromCache(nsName) + if !found { + klog.Warningf("Attempting to remove pod gateway %s/%s from a namespace that does not exist %s", podNamespacedName.Namespace, podNamespacedName.Name, nsName) + return nil + } + defer m.unlockNamespaceInfoCache(nsName) + + gateways, found := cacheInfo.dynamicGateways[podNamespacedName] + if !found { + klog.Warningf("Pod %s/%s not found in dynamic cacheInfo for namespace %s", podNamespacedName.Namespace, podNamespacedName.Name, nsName) + return nil + } + annotatedGWIPs, err := m.calculateAnnotatedNamespaceGatewayIPsForNamespace(nsName) + if err != nil { + return err + } + // it is safe to pass the current policies and not to expect the pod IP in the coexisting list of IPs since the pod will no longer match the dynamic hop selectors in any of the policies + coexistingIPs, err := m.retrieveDynamicGatewayIPsForPolicies(cacheInfo.policies) + if err != nil { + return err + } + coexistingIPs = coexistingIPs.Union(annotatedGWIPs) + // Filter out the IPs that are not in coexisting. Those IPs are to be deleted. + invalidGWIPs := gateways.gws.Difference(coexistingIPs) + // Filter out the IPs from the coexisting list that are to be kept by calculating the difference between the coexising and those IPs that are to be deleted and not coexisting at the same time. + ipsToKeep := coexistingIPs.Difference(invalidGWIPs) + klog.Infof("Coexisting %s, invalid %s, ipsToKeep %s", strings.Join(sets.List(coexistingIPs), ","), strings.Join(sets.List(invalidGWIPs), ","), strings.Join(sets.List(ipsToKeep), ",")) + err = m.netClient.deleteGatewayIPs(nsName, invalidGWIPs, ipsToKeep) + if err != nil { + return err + } + gateways.gws.Delete(invalidGWIPs.UnsortedList()...) + if gateways.gws.Len() == 0 { + // remove pod from namespace cache + delete(cacheInfo.dynamicGateways, podNamespacedName) + } + return nil +} + +func (m *externalPolicyManager) addPodGatewayToNamespace(podNamespacedName ktypes.NamespacedName, namespaceName string, processedPolicies []*routePolicy) error { + // the pod's gatewayInfo is unique to a namespace as the networkName field can differ depending on the policy definition of that field + // so we retrieve the correct one for the given target namespace from the pre-processed policies. It uses + // the target namespace and the key (pod_namespace,pod_name) as keys. + gatewayInfo, err := m.findGatewayInfoForPodInTargetNamespace(podNamespacedName, namespaceName, processedPolicies) + if err != nil { + return err + } + // use the pod's gatewayInfo to update the logical routes for all the pod's in the target namespace + err = m.addGWRoutesForNamespace(namespaceName, gatewayInfoList{gatewayInfo}) + if err != nil { + return err + } + cacheInfo, found := m.getNamespaceInfoFromCache(namespaceName) + defer m.unlockNamespaceInfoCache(namespaceName) + if !found { + cacheInfo = m.newNamespaceInfoInCache(namespaceName) + } + // add pod gateway information to the namespace cache + cacheInfo.dynamicGateways[podNamespacedName] = gatewayInfo + return nil +} + +// processUpdatePod takes in an updated gateway pod and the list of old namespaces where the pod was used as egress gateway and proceeds as follows +// - Finds the matching policies that apply to the pod based on the dynamic hop pod and namespace selectors. If the labels in the pod have not changed, the policies will match to the existing one. +// - Based on the policies that use the pod IP as gateway, determine the namespaces where the pod IP will be used as egress gateway. If the namespaces match, return without error +// - Remove the pod IP as egress gateway from the namespaces that are no longer impacted by the pod. This is determined by calculating the difference between the old namespaces and the new ones based on the policies +// applicable to the updated pod. +// - Add the pod IP as egress gateway to the namespaces that are now being impacted by the changes in the pod. +func (m *externalPolicyManager) processUpdatePod(updatedPod *v1.Pod, oldTargetNs sets.Set[string]) error { + + // find the policies that apply to this new pod. Unless there are changes to the labels, they should be identical. + newPodPolicies, err := m.findMatchingDynamicPolicies(updatedPod) + if err != nil { + return err + } + key := ktypes.NamespacedName{Namespace: updatedPod.Namespace, Name: updatedPod.Name} + // aggregate the expected target namespaces based on the new pod's labels and current policies + // if the labels have not changed, the new targeted namespaces and the old ones should be identical + newTargetNs, err := m.aggregateTargetNamespacesByPolicies(key, newPodPolicies) + if err != nil { + return err + } + if oldTargetNs.Equal(newTargetNs) { + // targeting the same namespaces. Nothing to do + return nil + } + // the pods have changed and they don't target the same sets of namespaces, delete its reference on the ones that don't apply + // and add to the new ones, if necessary + nsToRemove := oldTargetNs.Difference(newTargetNs) + nsToAdd := newTargetNs.Difference(oldTargetNs) + klog.Infof("Removing pod gateway %s/%s from namespace(s): %s", updatedPod.Namespace, updatedPod.Name, strings.Join(sets.List(nsToRemove), ",")) + klog.Infof("Adding pod gateway %s/%s to namespace(s): %s", updatedPod.Namespace, updatedPod.Name, strings.Join(sets.List(nsToAdd), ",")) + // retrieve the gateway information for the pod + for ns := range nsToRemove { + err = m.removePodGatewayFromNamespace(ns, ktypes.NamespacedName{Namespace: updatedPod.Namespace, Name: updatedPod.Name}) + if err != nil { + return err + } + } + + // pre-process the policies so we can apply them this process extracts from the CR the contents of the policies + // into an internal structure that contains the static and dynamic hops information. + pp, err := m.processExternalRoutePolicies(newPodPolicies) + if err != nil { + return err + } + + for ns := range nsToAdd { + err = m.addPodGatewayToNamespace(ktypes.NamespacedName{Namespace: updatedPod.Namespace, Name: updatedPod.Name}, ns, pp) + if err != nil { + return err + } + } + + return nil +} + +func (m *externalPolicyManager) aggregateTargetNamespacesByPolicies(podName ktypes.NamespacedName, externalRoutePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (sets.Set[string], error) { + targetNamespaces := sets.New[string]() + for _, erp := range externalRoutePolicies { + namespaces, err := m.listNamespacesBySelector(&erp.Spec.From.NamespaceSelector) + if err != nil { + return nil, err + } + for _, ns := range namespaces { + if targetNamespaces.Has(ns.Name) { + klog.Warningf("External gateway pod %s targets namespace %s more than once", podName.Namespace, podName.Name) + continue + } + targetNamespaces = targetNamespaces.Insert(ns.Name) + } + } + return targetNamespaces, nil +} + +func (m *externalPolicyManager) findGatewayInfoForPodInTargetNamespace(key ktypes.NamespacedName, targetNamespace string, processedPolicies []*routePolicy) (*gatewayInfo, error) { + for _, p := range processedPolicies { + namespaces, err := m.listNamespacesBySelector(p.targetNamespacesSelector) + if err != nil { + return nil, err + } + for _, targetNs := range namespaces { + if targetNs.Name == targetNamespace { + return p.dynamicGateways[key], nil + } + } + } + return nil, fmt.Errorf("gateway information for pod %s/%s not found", key.Namespace, key.Name) +} + +// processDeletePod removes the gateway IP derived from the pod. The IP is then removed from all the pods found in the namespaces by the +// network client (north bound as logical static route or in conntrack). +func (m *externalPolicyManager) processDeletePod(pod *v1.Pod, namespaces sets.Set[string]) error { + err := m.deletePodGatewayInNamespaces(pod, namespaces) + if err != nil { + return err + } + return nil +} + +func (m *externalPolicyManager) deletePodGatewayInNamespaces(pod *v1.Pod, targetNamespaces sets.Set[string]) error { + + for nsName := range targetNamespaces { + err := m.deletePodGatewayInNamespace(pod, nsName) + if err != nil { + return err + } + } + return nil +} + +func (m *externalPolicyManager) deletePodGatewayInNamespace(pod *v1.Pod, targetNamespace string) error { + + key := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + cacheInfo, found := m.getNamespaceInfoFromCache(targetNamespace) + if !found { + klog.Warningf("Attempting to delete pod gateway %s/%s from a namespace that does not exist %s", pod.Namespace, pod.Name, targetNamespace) + return nil + } + defer m.unlockNamespaceInfoCache(targetNamespace) + gwInfo, ok := cacheInfo.dynamicGateways[key] + if !ok { + return fmt.Errorf("unable to find cached pod %s/%s external gateway information in namespace %s", pod.Namespace, pod.Name, targetNamespace) + } + annotatedGWIPs, err := m.calculateAnnotatedNamespaceGatewayIPsForNamespace(targetNamespace) + if err != nil { + return err + } + coexistingIPs, err := m.retrieveDynamicGatewayIPsForPolicies(cacheInfo.policies) + if err != nil { + return err + } + coexistingIPs = coexistingIPs.Union(annotatedGWIPs) + // Filter out the IPs that are not in coexisting. Those IPs are to be deleted. + invalidGWIPs := gwInfo.gws.Difference(coexistingIPs) + // Filter out the IPs from the coexisting list that are to be kept by calculating the difference between the coexising and those IPs that are to be deleted and not coexisting at the same time. + ipsToKeep := coexistingIPs.Difference(invalidGWIPs) + klog.Infof("Coexisting %s, invalid %s, ipsToKeep %s", strings.Join(sets.List(coexistingIPs), ","), strings.Join(sets.List(invalidGWIPs), ","), strings.Join(sets.List(ipsToKeep), ",")) + err = m.netClient.deleteGatewayIPs(targetNamespace, invalidGWIPs, ipsToKeep) + if err != nil { + return err + } + gwInfo.gws.Delete(invalidGWIPs.UnsortedList()...) + if cacheInfo.dynamicGateways[key].gws.Len() == 0 { + delete(cacheInfo.dynamicGateways, key) + } + return nil +} + +// processAddPodRoutes applies the policies associated to the pod's namespace to the pod logical route +func (m *externalPolicyManager) applyGatewayInfoToPod(newPod *v1.Pod, static gatewayInfoList, dynamic map[ktypes.NamespacedName]*gatewayInfo) error { + err := m.netClient.addGatewayIPs(newPod, static) + if err != nil { + return err + } + for _, egress := range dynamic { + err := m.netClient.addGatewayIPs(newPod, gatewayInfoList{egress}) + if err != nil { + return err + } + } + return nil +} + +// getRoutePolicyForPodGateway iterates through the dynamic hops of a given external route policy spec to determine the pod's GW information. +// Note that a pod can match multiple policies with different configuration at the same time, with the condition +// that the pod can only target the same namespace once at most. That's a 1-1 pod to namespace match. +func (m *externalPolicyManager) getRoutePolicyForPodGateway(newPod *v1.Pod, externalRoutePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (*routePolicy, error) { + + key := ktypes.NamespacedName{Namespace: newPod.Namespace, Name: newPod.Name} + + pp, err := m.processExternalRoutePolicy(externalRoutePolicy) + if err != nil { + return nil, err + } + if _, ok := pp.dynamicGateways[key]; !ok { + return nil, fmt.Errorf("pod %s not found while processing dynamic hops", key) + } + // store only the information needed + return &routePolicy{ + targetNamespacesSelector: pp.targetNamespacesSelector, + dynamicGateways: map[ktypes.NamespacedName]*gatewayInfo{key: pp.dynamicGateways[key]}, + }, nil + +} + +func getExGwPodIPs(gatewayPod *v1.Pod, networkName string) (sets.Set[string], error) { + if networkName != "" { + return getMultusIPsFromNetworkName(gatewayPod, networkName) + } + if gatewayPod.Spec.HostNetwork { + return getPodIPs(gatewayPod), nil + } + return nil, fmt.Errorf("ignoring pod %s as an external gateway candidate. Invalid combination "+ + "of host network: %t and routing-network annotation: %s", gatewayPod.Name, gatewayPod.Spec.HostNetwork, + networkName) +} + +func getPodIPs(pod *v1.Pod) sets.Set[string] { + foundGws := sets.New[string]() + for _, podIP := range pod.Status.PodIPs { + ip := utilnet.ParseIPSloppy(podIP.IP) + if ip != nil { + foundGws.Insert(ip.String()) + } + } + return foundGws +} + +func getMultusIPsFromNetworkName(pod *v1.Pod, networkName string) (sets.Set[string], error) { + foundGws := sets.New[string]() + var multusNetworks []nettypes.NetworkStatus + err := json.Unmarshal([]byte(pod.ObjectMeta.Annotations[nettypes.NetworkStatusAnnot]), &multusNetworks) + if err != nil { + return nil, fmt.Errorf("unable to unmarshall annotation on pod %s k8s.v1.cni.cncf.io/network-status '%s': %v", + pod.Name, pod.ObjectMeta.Annotations[nettypes.NetworkStatusAnnot], err) + } + for _, multusNetwork := range multusNetworks { + if multusNetwork.Name == networkName { + for _, gwIP := range multusNetwork.IPs { + ip := net.ParseIP(gwIP) + if ip != nil { + foundGws.Insert(ip.String()) + } + } + return foundGws, nil + } + } + return nil, fmt.Errorf("unable to find multus network %s in pod %s/%s", networkName, pod.Namespace, pod.Name) +} + +func (m *externalPolicyManager) filterNamespacesUsingPodGateway(key ktypes.NamespacedName) sets.Set[string] { + namespaces := sets.New[string]() + nsList := m.listNamespaceInfoCache() + for _, namespaceName := range nsList { + cacheInfo, found := m.getNamespaceInfoFromCache(namespaceName) + if !found { + continue + } + if _, ok := cacheInfo.dynamicGateways[key]; ok { + namespaces = namespaces.Insert(namespaceName) + } + m.unlockNamespaceInfoCache(namespaceName) + } + return namespaces +} + +func (m *externalPolicyManager) listPodsInNamespaceWithSelector(namespace string, selector *metav1.LabelSelector) ([]*v1.Pod, error) { + + s, err := metav1.LabelSelectorAsSelector(selector) + if err != nil { + return nil, err + } + return m.podLister.Pods(namespace).List(s) +} + +func containsNamespaceInSlice(nss []*v1.Namespace, podNs string) bool { + for _, ns := range nss { + if ns.Name == podNs { + return true + } + } + return false +} + +func containsPodInSlice(pods []*v1.Pod, podName string) bool { + for _, pod := range pods { + if pod.Name == podName { + return true + } + } + return false +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_pod_test.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_pod_test.go new file mode 100644 index 0000000000..690a1b7637 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_pod_test.go @@ -0,0 +1,500 @@ +package apbroute + +import ( + "context" + "reflect" + "strconv" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/kubernetes/fake" +) + +var _ = Describe("OVN External Gateway policy", func() { + + var ( + namespaceDefault = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "default", + Labels: map[string]string{"name": "default"}}} + namespaceTest = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test", + Labels: map[string]string{"name": "test", "match": "test", "multiple": "true"}}, + } + namespaceTest2 = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test2", + Labels: map[string]string{"name": "test2", "match": "test2", "multiple": "true"}}, + } + + dynamicPolicy = newPolicy( + "dynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + + dynamicPolicyForTest2Only = newPolicy( + "policyForTest2", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test2"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"duplicated": "true"}}, + false, + ) + + overlappingPolicy = newPolicy( + "overlapping", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"duplicated": "true"}}, + false, + ) + + multipleNamespacesPolicy = newPolicy( + "multipleNamespaces", + &v1.LabelSelector{MatchLabels: map[string]string{"multiple": "true"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + + pod1 = newPod("pod_1", "default", "192.168.10.1", map[string]string{"key": "pod", "name": "pod1", "duplicated": "true"}) + pod2 = newPod("pod_2", "default", "192.168.20.1", map[string]string{"key": "pod", "name": "pod2"}) + pod3 = newPod("pod_3", "default", "192.168.30.1", map[string]string{"key": "pod", "name": "pod3"}) + ) + AfterEach(func() { + nbsbCleanup.Cleanup() + }) + + BeforeEach(func() { + initialDB = libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + }, + } + nbClient, _, nbsbCleanup, err = libovsdbtest.NewNBSBTestHarness(initialDB) + Expect(err).NotTo(HaveOccurred()) + stopChan = make(chan struct{}) + + }) + + var _ = Context("When adding a new pod", func() { + + It("processes the pod that is a pod gateway with multiples matching policies each in a different namespaces", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2}, []runtime.Object{multipleNamespacesPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(multipleNamespacesPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(multipleNamespacesPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + _, err := fakeClient.CoreV1().Pods(pod1.Namespace).Create(context.Background(), pod1, v1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(multipleNamespacesPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(multipleNamespacesPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }})) + + }) + + It("processes the pod that has no policy match", func() { + noMatchPolicy := newPolicy( + "noMatchPolicy", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "nomatch"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest}, []runtime.Object{noMatchPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + _, err := fakeClient.CoreV1().Pods(pod1.Namespace).Create(context.Background(), pod1, v1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(noMatchPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + + }) + + It("processes a pod gateway that matches a two policies to the same target namespace", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2}, []runtime.Object{overlappingPolicy, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(overlappingPolicy.Name, dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + _, err := fakeClient.CoreV1().Pods(pod1.Namespace).Create(context.Background(), pod1, v1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + Equal( + &namespaceInfo{ + policies: sets.New(overlappingPolicy.Name, dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }})) + + }) + }) + + var _ = Context("When deleting a pod", func() { + It("deletes a pod gateway that matches two policies, each targeting a different namespace", func() { + dynamicPolicyTest2 := newPolicy( + "dynamicTest2", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test2"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, pod1}, []runtime.Object{dynamicPolicyTest2, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyTest2.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + deletePod(pod1, fakeClient) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyTest2.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + }) + + It("deletes a pod that does not match any policy", func() { + noMatchPolicy := newPolicy( + "nomatch", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "nomatch"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1}, []runtime.Object{noMatchPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(noMatchPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + deletePod(pod1, fakeClient) + Eventually(func() bool { + _, err := fakeClient.CoreV1().Pods(pod1.Namespace).Get(context.Background(), pod1.Name, v1.GetOptions{}) + return apierrors.IsNotFound(err) + }).Should(BeTrue()) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(noMatchPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + }) + + It("deletes a pod gateway that is one of two pods that matches two policies to the same target namespace", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2}, []runtime.Object{overlappingPolicy, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + deletePod(pod1, fakeClient) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(overlappingPolicy.Name, dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + }) + }) + + var _ = Context("When updating a pod", func() { + It("updates an existing pod gateway to match an additional new policy to a new target namespace", func() { + unmatchPod := newPod("unmatchPod", "default", "192.168.100.1", map[string]string{"name": "unmatchPod"}) + initController([]runtime.Object{namespaceDefault, namespaceTest, unmatchPod}, []runtime.Object{dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + updatePodLabels(unmatchPod, pod1.Labels, fakeClient) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: unmatchPod.Name}: { + gws: sets.New(unmatchPod.Status.PodIPs[0].IP), + }, + }, + })) + }) + + It("updates an existing pod gateway to match a new policy that targets the same namespace", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, pod2}, []runtime.Object{overlappingPolicy, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name, overlappingPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + updatePodLabels(pod2, map[string]string{"duplicated": "true"}, fakeClient) + // wait for 2 second to ensure that the pod changed have been reconciled. We are doing this because the outcome of the change should not impact the list of dynamic IPs and + // there is no way to know which of the policies apply specifically to the pod. + Eventually(func() bool { + p, err := fakeClient.CoreV1().Pods(pod2.Namespace).Get(context.TODO(), pod2.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return reflect.DeepEqual(p.Labels, map[string]string{"duplicated": "true"}) + }, 2, 2).Should(BeTrue()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name, overlappingPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + }) + + It("updates an existing pod gateway to match a new policy that targets a different namespace", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, pod2, pod3}, []runtime.Object{dynamicPolicyForTest2Only, dynamicPolicy}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + {Namespace: "default", Name: pod3.Name}: { + gws: sets.New(pod3.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyForTest2Only.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + updatePodLabels(pod2, map[string]string{"duplicated": "true"}, fakeClient) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod3.Name}: { + gws: sets.New(pod3.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyForTest2Only.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + }) + It("updates an existing pod gateway to match no policies", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2}, []runtime.Object{dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, time.Minute).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + updatePodLabels(pod1, map[string]string{}, fakeClient) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5, 1).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }, + })) + }) + + It("updates a pod to match a policy to a single namespace", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, pod1}, []runtime.Object{dynamicPolicyForTest2Only, dynamicPolicy}) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyForTest2Only.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + updatePodLabels(pod1, map[string]string{"key": "pod"}, fakeClient) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }, + })) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should(Equal( + &namespaceInfo{ + policies: sets.New(dynamicPolicyForTest2Only.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + }) + + }) +}) + +func deletePod(pod *corev1.Pod, fakeClient *fake.Clientset) { + + err = fakeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, v1.DeleteOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func updatePodLabels(pod *corev1.Pod, newLabels map[string]string, fakeClient *fake.Clientset) { + + p, err := fakeClient.CoreV1().Pods(pod.Namespace).Get(context.TODO(), pod.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + incrementResourceVersion(p) + p.Labels = newLabels + _, err = fakeClient.CoreV1().Pods(pod.Namespace).Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) +} + +func incrementResourceVersion(obj v1.Object) { + var rs int64 + if obj.GetResourceVersion() != "" { + rs, err = strconv.ParseInt(obj.GetResourceVersion(), 10, 64) + Expect(err).NotTo(HaveOccurred()) + } + rs++ + obj.SetResourceVersion(strconv.FormatInt(rs, 10)) +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_policy.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_policy.go new file mode 100644 index 0000000000..895c02406a --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_policy.go @@ -0,0 +1,722 @@ +package apbroute + +import ( + "fmt" + "net" + "reflect" + "strings" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + ktypes "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +// processAddPolicy takes in a new policy and applies it. To do that, it aggregates the IPs from the static hops and retrieves the IPs from the pods resulting from applying the +// namespace and pod selectors in the dynamic hops. +// The last step is to store the new policy in the route policy cache so that it can be used in the future to compare against changes in its spec. +func (m *externalPolicyManager) processAddPolicy(routePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (*routePolicy, error) { + + // it's a new policy + processedPolicies, err := m.processExternalRoutePolicy(routePolicy) + if err != nil { + return nil, err + } + err = m.applyProcessedPolicy(routePolicy.Name, processedPolicies) + if err != nil { + return nil, err + } + err = m.storeRoutePolicyInCache(routePolicy) + if err != nil { + return nil, err + } + klog.Infof("Added Admin Policy Based External Route %s", routePolicy.Name) + return processedPolicies, nil +} + +// applyProcessedPolicy takes in a route policy and applies it to each of the namespaces defined in the namespaces selector in the route policy. +// As part of the process, it also updates the namespace info cache with the new gatway information derived from the route policy, so that it keeps +// track for each namespace of the gateway IPs that are being applied and the names of the policies impacting the namespace. +func (m *externalPolicyManager) applyProcessedPolicy(policyName string, routePolicy *routePolicy) error { + targetNs, err := m.listNamespacesBySelector(routePolicy.targetNamespacesSelector) + if err != nil { + return err + } + for _, ns := range targetNs { + cacheInfo, found := m.getNamespaceInfoFromCache(ns.Name) + if !found { + cacheInfo = m.newNamespaceInfoInCache(ns.Name) + } + err = m.applyProcessedPolicyToNamespace(ns.Name, policyName, routePolicy, cacheInfo) + m.unlockNamespaceInfoCache(ns.Name) + if err != nil { + return err + } + } + return nil +} + +// processDeletePolicy takes in a policy, marks it for deletion and proceeds to delete the gateway IPs derived from the static and dynamic hops from the namespaces impacted by the policy, as defined by the namespace +// selector in the from field. The last step is to delete it from the cache. +func (m *externalPolicyManager) processDeletePolicy(policyName string) error { + + // mark the policy for deletion. + // if it's already marked continue processing the delete action as this could be a retry attempt from a previous failed delete run. + // if it's no longer in the cache, return nil + routePolicy, found := m.getAndMarkRoutePolicyForDeletionInCache(policyName) + if !found { + return nil + } + targetNs, err := m.listNamespacesBySelector(&routePolicy.Spec.From.NamespaceSelector) + if err != nil { + return err + } + for _, ns := range targetNs { + cacheInfo, found := m.getNamespaceInfoFromCache(ns.Name) + if !found { + klog.Warningf("Attempting to delete policy %s from a namespace that does not exist %s", routePolicy.Name, ns.Name) + continue + } + err = m.removePolicyFromNamespace(ns.Name, &routePolicy, cacheInfo) + if err != nil { + m.unlockNamespaceInfoCache(ns.Name) + return err + } + if cacheInfo.policies.Len() == 0 { + m.deleteNamespaceInfoInCache(ns.Name) + } + m.unlockNamespaceInfoCache(ns.Name) + } + err = m.deleteRoutePolicyFromCache(routePolicy.Name) + if err != nil { + return err + } + klog.Infof("Deleted Admin Policy Based External Route %s", routePolicy.Name) + return nil +} + +// calculateAnnotatedNamespaceGatewayIPsForNamespace retrieves the list of IPs defined by the legacy annotation gateway logic for namespaces. +// this function is used when deleting gateway IPs to ensure that IPs that overlap with the annotation logic are not deleted from the network resource +// (north bound or conntrack) when the given IP is deleted when removing the policy that references them. +func (m *externalPolicyManager) calculateAnnotatedNamespaceGatewayIPsForNamespace(targetNamespace string) (sets.Set[string], error) { + namespace, err := m.namespaceLister.Get(targetNamespace) + if err != nil { + return nil, err + } + + if annotation, ok := namespace.Annotations[util.RoutingExternalGWsAnnotation]; ok { + exGateways, err := util.ParseRoutingExternalGWAnnotation(annotation) + if err != nil { + return nil, err + } + return exGateways, nil + } + return sets.New[string](), nil + +} + +// calculateAnnotatedPodGatewayIPsForNamespace retrieves the list of IPs defined by the legacy annotation gateway logic for pods. +// this function is used when deleting gateway IPs to ensure that IPs that overlap with the annotation logic are not deleted from the network resource +// (north bound or conntrack) when the given IP is deleted when removing the policy that references them. +func (m *externalPolicyManager) calculateAnnotatedPodGatewayIPsForNamespace(targetNamespace string) (sets.Set[string], error) { + gwIPs := sets.New[string]() + podList, err := m.podLister.List(labels.Everything()) + if err != nil { + return nil, err + } + + for _, pod := range podList { + networkName, ok := pod.Annotations[util.RoutingNetworkAnnotation] + if !ok { + continue + } + targetNamespaces, ok := pod.Annotations[util.RoutingNamespaceAnnotation] + if !ok { + continue + } + foundGws, err := getExGwPodIPs(pod, networkName) + if err != nil { + klog.Errorf("Error getting exgw IPs for pod: %s, error: %v", pod.Name, err) + return nil, err + } + if foundGws.Len() == 0 { + klog.Errorf("No pod IPs found for pod %s/%s", pod.Namespace, pod.Name) + continue + } + tmpNs := sets.New(strings.Split(targetNamespaces, ",")...) + if tmpNs.Has(targetNamespaces) { + gwIPs = gwIPs.Union(foundGws) + } + } + return gwIPs, nil +} + +// deletePolicyInNamespace removes the gateway IPs derived from a policy in a namespace. It takes into account the gateway IPs from the legacy +// annotations and other policies impacting the same namespace to avoid deleting IPs that coexist in other resources. +// In a nutshell, if a gateway IP is only found in the policy being deleted, then the IP is removed from the network resource. But if the IP is +// found in at least a legacy annotation or another policy impacting the namespace, then the IP is not removed from the cache or the network resource (north bound or conntrack) +func (m *externalPolicyManager) deletePolicyInNamespace(namespaceName, policyName string, routePolicy *routePolicy, cacheInfo *namespaceInfo) error { + coexistingPolicies := cacheInfo.policies.Clone().Delete(policyName) + annotatedGWIPs, err := m.calculateAnnotatedNamespaceGatewayIPsForNamespace(namespaceName) + if err != nil { + return err + } + coexistingIPs, err := m.retrieveStaticGatewayIPsForPolicies(coexistingPolicies) + if err != nil { + return err + } + + // don't care if the route is flagged for deletion, delete any gw IPs related to the policy + policy, found, _ := m.getRoutePolicyFromCache(policyName) + if !found { + return fmt.Errorf("policy %s not found", policyName) + } + pp, err := m.processExternalRoutePolicy(&policy) + if err != nil { + return err + } + + static := sets.New[string]() + for _, gatewayInfo := range pp.staticGateways { + static = static.Union(gatewayInfo.gws) + } + for _, gwInfo := range routePolicy.staticGateways { + static = static.Delete(gwInfo.gws.UnsortedList()...) + } + coexistingIPs = coexistingIPs.Union(annotatedGWIPs).Union(static) + + for _, gwInfo := range routePolicy.staticGateways { + // Filter out the IPs that are not in coexisting. Those IPs are to be deleted. + invalidGWIPs := gwInfo.gws.Difference(coexistingIPs) + // Filter out the IPs from the coexisting list that are to be kept by calculating the difference between the coexising and those IPs that are to be deleted and not coexisting at the same time. + ipsToKeep := coexistingIPs.Difference(invalidGWIPs) + klog.Infof("Coexisting %s, invalid %s, ipsToKeep %s", strings.Join(sets.List(coexistingIPs), ","), strings.Join(sets.List(invalidGWIPs), ","), strings.Join(sets.List(ipsToKeep), ",")) + err := m.netClient.deleteGatewayIPs(namespaceName, invalidGWIPs, ipsToKeep) + if err != nil { + return err + } + if gwInfo.gws.Equal(invalidGWIPs) { + cacheInfo.staticGateways = cacheInfo.staticGateways.Delete(gwInfo) + continue + } + gwInfo.gws = gwInfo.gws.Delete(invalidGWIPs.UnsortedList()...) + } + + annotatedGWIPs, err = m.calculateAnnotatedPodGatewayIPsForNamespace(namespaceName) + if err != nil { + return err + } + + coexistingIPs, err = m.retrieveDynamicGatewayIPsForPolicies(coexistingPolicies) + if err != nil { + return err + } + + dynamic := sets.New[string]() + for _, gatewayInfo := range pp.dynamicGateways { + dynamic = static.Union(gatewayInfo.gws) + } + for _, gwInfo := range routePolicy.dynamicGateways { + dynamic = dynamic.Delete(gwInfo.gws.UnsortedList()...) + } + coexistingIPs = coexistingIPs.Union(annotatedGWIPs).Union(dynamic) + + for pod, gwInfo := range routePolicy.dynamicGateways { + // Filter out the IPs that are not in coexisting. Those IPs are to be deleted. + invalidGWIPs := gwInfo.gws.Difference(coexistingIPs) + // Filter out the IPs from the coexisting list that are to be kept by calculating the difference between the coexising and those IPs that are to be deleted and not coexisting at the same time. + ipsToKeep := coexistingIPs.Difference(invalidGWIPs) + klog.Infof("Coexisting %s, invalid %s, ipsToKeep %s", strings.Join(sets.List(coexistingIPs), ","), strings.Join(sets.List(invalidGWIPs), ","), strings.Join(sets.List(ipsToKeep), ",")) + err := m.netClient.deleteGatewayIPs(namespaceName, invalidGWIPs, ipsToKeep) + if err != nil { + return err + } + if gwInfo.gws.Equal(invalidGWIPs) { + // delete cached information for the pod gateway + delete(cacheInfo.dynamicGateways, pod) + continue + } + gwInfo.gws = gwInfo.gws.Delete(invalidGWIPs.UnsortedList()...) + } + return nil +} + +// applyProcessedPolicyToNamespace applies the gateway IPs derived from the processed policy to a namespace and updates the cache information for the namespace. +func (m *externalPolicyManager) applyProcessedPolicyToNamespace(namespaceName, policyName string, routePolicy *routePolicy, cacheInfo *namespaceInfo) error { + + if routePolicy.staticGateways.Len() > 0 { + err := m.addGWRoutesForNamespace(namespaceName, routePolicy.staticGateways) + if err != nil { + return err + } + var duplicated sets.Set[string] + cacheInfo.staticGateways, duplicated = cacheInfo.staticGateways.Insert(routePolicy.staticGateways...) + if duplicated.Len() > 0 { + klog.Warningf("Found duplicated gateway IP(s) %+s in policy %s", sets.List(duplicated), policyName) + } + } + for pod, info := range routePolicy.dynamicGateways { + err := m.addGWRoutesForNamespace(namespaceName, gatewayInfoList{info}) + if err != nil { + return err + } + cacheInfo.dynamicGateways[pod] = info + } + cacheInfo.policies = cacheInfo.policies.Insert(policyName) + return nil +} + +// processUpdatePolicy takes in the current and updated version of a given policy and applies the following logic: +// * Determine the changes between the current and updated version. +// * Remove the static and dynamic hop entries in the namespaces impacted by the current version of the policy that are in the current policy but not in the updated version. +// * Apply the static and dynamic hop entries in the namespaces impacted by the updated version of the policy that are in the updated version but not in the current version. +// * Store the updated policy in the route policy cache. +func (m *externalPolicyManager) processUpdatePolicy(currentPolicy, updatedPolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (*routePolicy, error) { + klog.Infof("Processing update for Admin Policy Based External Route '%s'", currentPolicy.Name) + + // To update the policies, first we'll process the diff between old and new and remove the discrepancies that are not found in the new object. + // Afterwards, we'll process the diff between the new and the old and apply the new policies not found in the old policy, ensuring that we are not reduplicating the gatewayInfo. + err := m.removeDiscrepanciesInRoutePolicy(currentPolicy, updatedPolicy) + if err != nil { + return nil, err + } + // At this point we have removed all the aspects of the current policy that no longer applies. Next step is to apply the parts of the new policy that are not in the current one. + err = m.applyUpdatesInRoutePolicy(currentPolicy, updatedPolicy) + if err != nil { + return nil, err + } + + // update the cache to ensure it reflects the latest copy + err = m.storeRoutePolicyInCache(updatedPolicy) + if err != nil { + return nil, err + } + klog.Infof("Updated Admin Policy Based External Route %s", currentPolicy.Name) + return m.processExternalRoutePolicy(updatedPolicy) +} + +func (m *externalPolicyManager) applyUpdatesInRoutePolicy(currentPolicy, newPolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + additionalNamespaces, additionalStaticHops, additionalDynamicHops, err := m.calculatePolicyDifferences(newPolicy, currentPolicy) + if err != nil { + return err + } + // apply the new policy to the new namespaces where the policy now applies + for additionalNs := range additionalNamespaces { + cacheInfo, found := m.getNamespaceInfoFromCache(additionalNs) + if !found { + // if not found create a new one + cacheInfo = m.newNamespaceInfoInCache(additionalNs) + } + err := m.applyPolicyToNamespace(additionalNs, newPolicy, cacheInfo) + m.unlockNamespaceInfoCache(additionalNs) + if err != nil { + return err + } + } + + processedStaticHops, err := m.processStaticHopsGatewayInformation(additionalStaticHops) + if err != nil { + return err + } + processedDynamicHops, err := m.processDynamicHopsGatewayInformation(additionalDynamicHops) + if err != nil { + return err + } + // retrieve all new namespaces + nsList, err := m.listNamespacesBySelector(&newPolicy.Spec.From.NamespaceSelector) + if err != nil { + return err + } + for _, ns := range nsList { + if additionalNamespaces.Has(ns.Name) { + // policy has already been fully applied to this namespace by the previous operation + continue + } + cacheInfo, found := m.getNamespaceInfoFromCache(ns.Name) + if !found { + cacheInfo = m.newNamespaceInfoInCache(ns.Name) + } + err = m.applyProcessedPolicyToNamespace(ns.Name, currentPolicy.Name, &routePolicy{dynamicGateways: processedDynamicHops, staticGateways: processedStaticHops}, cacheInfo) + m.unlockNamespaceInfoCache(ns.Name) + if err != nil { + return err + } + } + + return nil +} + +func (m *externalPolicyManager) removeDiscrepanciesInRoutePolicy(currentPolicy, updatedPolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + unmatchingNamespaces, unmatchingStaticHops, unmatchingDynamicHops, err := m.calculatePolicyDifferences(currentPolicy, updatedPolicy) + if err != nil { + return err + } + // delete the namespaces where this policy no longer applies + for unmatchNs := range unmatchingNamespaces { + cacheInfo, found := m.getNamespaceInfoFromCache(unmatchNs) + if !found { + klog.Warningf("Attempting to delete policy %s from a namespace that does not exist %s", currentPolicy.Name, unmatchNs) + continue + } + err := m.removePolicyFromNamespace(unmatchNs, currentPolicy, cacheInfo) + if err != nil { + m.unlockNamespaceInfoCache(unmatchNs) + return err + } + if cacheInfo.policies.Len() == 0 { + m.deleteNamespaceInfoInCache(unmatchNs) + } + m.unlockNamespaceInfoCache(unmatchNs) + } + + // delete the hops that no longer apply from all the current policy's applicable namespaces + processedStaticHops, err := m.processStaticHopsGatewayInformation(unmatchingStaticHops) + if err != nil { + return err + } + processedDynamicHops, err := m.processDynamicHopsGatewayInformation(unmatchingDynamicHops) + if err != nil { + return err + } + // retrieve all current namespaces + nsList, err := m.listNamespacesBySelector(¤tPolicy.Spec.From.NamespaceSelector) + if err != nil { + return err + } + for _, ns := range nsList { + if unmatchingNamespaces.Has(ns.Name) { + // policy has already been deleted in this namespace by the previous operation + continue + } + cacheInfo, found := m.getNamespaceInfoFromCache(ns.Name) + if !found { + klog.Warningf("Attempting to update policy %s for a namespace that does not exist %s", currentPolicy.Name, ns.Name) + continue + } + err = m.deletePolicyInNamespace(ns.Name, currentPolicy.Name, &routePolicy{dynamicGateways: processedDynamicHops, staticGateways: processedStaticHops}, cacheInfo) + if err != nil { + m.unlockNamespaceInfoCache(ns.Name) + return err + } + if cacheInfo.policies.Len() == 0 { + m.deleteNamespaceInfoInCache(ns.Name) + } + m.unlockNamespaceInfoCache(ns.Name) + } + return nil +} + +// addGWRoutesForNamespace handles adding routes for all existing pods in namespace +func (m *externalPolicyManager) addGWRoutesForNamespace(namespace string, egress gatewayInfoList) error { + existingPods, err := m.podLister.Pods(namespace).List(labels.Everything()) + if err != nil { + return fmt.Errorf("failed to get all the pods (%v)", err) + } + for _, pod := range existingPods { + err := m.netClient.addGatewayIPs(pod, egress) + if err != nil { + return err + } + } + return nil +} + +func (m *externalPolicyManager) processStaticHopsGatewayInformation(hops []*adminpolicybasedrouteapi.StaticHop) (gatewayInfoList, error) { + gwList := gatewayInfoList{} + + // collect all the static gateway information from the nextHops slice + for _, h := range hops { + ip := net.ParseIP(h.IP) + if ip == nil { + return nil, fmt.Errorf("could not parse routing external gw annotation value '%s'", h.IP) + } + gwList = append(gwList, &gatewayInfo{gws: sets.New(ip.String()), bfdEnabled: h.BFDEnabled}) + } + return gwList, nil +} + +func (m *externalPolicyManager) processDynamicHopsGatewayInformation(hops []*adminpolicybasedrouteapi.DynamicHop) (map[ktypes.NamespacedName]*gatewayInfo, error) { + podsInfo := map[ktypes.NamespacedName]*gatewayInfo{} + for _, h := range hops { + podNS, err := m.listNamespacesBySelector(h.NamespaceSelector) + if err != nil { + return nil, err + } + for _, ns := range podNS { + s, err := metav1.LabelSelectorAsSelector(&h.PodSelector) + if err != nil { + return nil, err + } + pods, err := m.podLister.Pods(ns.Name).List(s) + if err != nil { + return nil, err + } + for _, pod := range pods { + foundGws, err := getExGwPodIPs(pod, h.NetworkAttachmentName) + if err != nil { + return nil, err + } + // if we found any gateways then we need to update current pods routing in the relevant namespace + if len(foundGws) == 0 { + klog.Warningf("No valid gateway IPs found for requested external gateway pod %s/%s", pod.Namespace, pod.Name) + continue + } + key := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + if _, ok := podsInfo[key]; ok { + klog.Warningf("Found overlapping dynamic hop policy for pod %s, discarding match entry", key) + continue + } + podsInfo[key] = &gatewayInfo{gws: foundGws, bfdEnabled: h.BFDEnabled} + } + } + } + return podsInfo, nil +} + +func (m *externalPolicyManager) processExternalRoutePolicy(policy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (*routePolicy, error) { + var ( + errors []error + ) + staticGWInfo, err := m.processStaticHopsGatewayInformation(policy.Spec.NextHops.StaticHops) + if err != nil { + errors = append(errors, err) + } + + dynamicGWInfo, err := m.processDynamicHopsGatewayInformation(policy.Spec.NextHops.DynamicHops) + if err != nil { + errors = append(errors, err) + } + if len(errors) > 0 { + return nil, kerrors.NewAggregate(errors) + } + return &routePolicy{ + targetNamespacesSelector: &policy.Spec.From.NamespaceSelector, + staticGateways: staticGWInfo, + dynamicGateways: dynamicGWInfo, + }, nil + +} + +func (m *externalPolicyManager) processExternalRoutePolicies(externalRoutePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) ([]*routePolicy, error) { + routePolicies := make([]*routePolicy, 0) + for _, erp := range externalRoutePolicies { + processedPolicies, err := m.processExternalRoutePolicy(erp) + if err != nil { + return nil, err + } + routePolicies = append(routePolicies, processedPolicies) + } + return routePolicies, nil +} + +func (m *externalPolicyManager) findMatchingDynamicPolicies(pod *v1.Pod) ([]*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, error) { + var routePolicies []*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute + crs, err := m.routeLister.List(labels.Everything()) + if err != nil { + return nil, err + } + for _, cr := range crs { + policySpec := adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{ + From: cr.Spec.From, + NextHops: adminpolicybasedrouteapi.ExternalNextHops{DynamicHops: []*adminpolicybasedrouteapi.DynamicHop{}}} + for _, dp := range cr.Spec.NextHops.DynamicHops { + nss, err := m.listNamespacesBySelector(dp.NamespaceSelector) + if err != nil { + return nil, err + } + if !containsNamespaceInSlice(nss, pod.Namespace) { + continue + } + nsPods, err := m.listPodsInNamespaceWithSelector(pod.Namespace, &dp.PodSelector) + if err != nil { + return nil, err + } + if containsPodInSlice(nsPods, pod.Name) { + // add only the hop information that intersects with the pod + policySpec.NextHops.DynamicHops = append(policySpec.NextHops.DynamicHops, dp) + } + } + if len(policySpec.NextHops.DynamicHops) > 0 { + routePolicies = append(routePolicies, &adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + ObjectMeta: metav1.ObjectMeta{ + Name: cr.Name, + }, + Spec: policySpec, + }) + } + + } + return routePolicies, nil +} + +func (m *externalPolicyManager) getPoliciesForNamespace(namespaceName string) (sets.Set[string], error) { + matches := sets.New[string]() + policies, err := m.routeLister.List(labels.Everything()) + if err != nil { + return nil, err + } + + for _, policy := range policies { + targetNamespaces, err := m.listNamespacesBySelector(&policy.Spec.From.NamespaceSelector) + if err != nil { + return nil, err + } + for _, ns := range targetNamespaces { + if namespaceName == ns.Name { + matches = matches.Insert(policy.Name) + } + } + } + + return matches, nil +} + +func (m *externalPolicyManager) aggregateDynamicRouteGatewayInformation(pod *v1.Pod, routePolicy *routePolicy) (map[string]*gatewayInfo, error) { + key := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + gwInfoMap := make(map[string]*gatewayInfo) + targetNs, err := m.listNamespacesBySelector(routePolicy.targetNamespacesSelector) + if err != nil { + return nil, err + } + for _, ns := range targetNs { + if _, ok := gwInfoMap[ns.Name]; ok { + return nil, fmt.Errorf("duplicated target namespace '%s ' while processing external policies for pod %s/%s", ns.Name, pod.Namespace, pod.Name) + } + gwInfoMap[ns.Name] = routePolicy.dynamicGateways[key] + } + return gwInfoMap, nil +} + +// calculatePolicyDifferences determines the differences between two policies in terms of namespaces where the policy applies, and the differences in static and dynamic hops. +// The return values are the namespaces, static hops and dynamic hops that are in the first policy but not in the second instance. +func (m *externalPolicyManager) calculatePolicyDifferences(policy1, policy2 *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) (sets.Set[string], []*adminpolicybasedrouteapi.StaticHop, []*adminpolicybasedrouteapi.DynamicHop, error) { + mismatchingNamespaces, err := m.calculateNamespaceSelectorDifferences(&policy1.Spec.From.NamespaceSelector, &policy2.Spec.From.NamespaceSelector) + if err != nil { + return nil, nil, nil, err + } + mismatchingStaticHops := m.calculateStaticHopDifferences(policy1.Spec.NextHops.StaticHops, policy2.Spec.NextHops.StaticHops) + mismatchingDynamicHops, err := m.calculateDynamicHopDifferences(policy1.Spec.NextHops.DynamicHops, policy2.Spec.NextHops.DynamicHops) + if err != nil { + return nil, nil, nil, err + } + + return mismatchingNamespaces, mismatchingStaticHops, mismatchingDynamicHops, nil +} + +// calculateNamespaceSelectorDifferences determines the difference between the first and the second selector. The outcome is a set that contains +// those namespace names that are in the first selector but not found in the second selector. +func (m *externalPolicyManager) calculateNamespaceSelectorDifferences(nsSelector1, nsSelector2 *metav1.LabelSelector) (sets.Set[string], error) { + unmatchingNamespaces := sets.New[string]() + if !reflect.DeepEqual(nsSelector1, nsSelector2) { + nsList1, err := m.listNamespacesBySelector(nsSelector1) + if err != nil { + return nil, err + } + nsList2, err := m.listNamespacesBySelector(nsSelector2) + if err != nil { + return nil, err + } + for _, ns1 := range nsList1 { + var found bool + for _, ns2 := range nsList2 { + if ns1.Name == ns2.Name { + found = true + break + } + } + if !found { + unmatchingNamespaces.Insert(ns1.Name) + } + } + } + return unmatchingNamespaces, nil +} + +// calculateStaticHopDifferences determines the difference between the first slice and the second staticHops slice. The outcome is a slice +// of static hops that are in the staticHop1 slice but not in the staticHop2 slice. +func (m *externalPolicyManager) calculateStaticHopDifferences(staticHops1, staticHops2 []*adminpolicybasedrouteapi.StaticHop) []*adminpolicybasedrouteapi.StaticHop { + diffStatic := make([]*adminpolicybasedrouteapi.StaticHop, 0) + for _, staticHop1 := range staticHops1 { + var found bool + for _, staticHop2 := range staticHops2 { + if reflect.DeepEqual(staticHop1, staticHop2) { + found = true + break + } + } + if !found { + diffStatic = append(diffStatic, staticHop1) + } + } + return diffStatic +} + +// calculateDynamicHopDifferences determines the difference between the first slice and the second dynamicHop slice. The return value is a slice +// of dynamic hops that are in the first slice but not in the second. +func (m *externalPolicyManager) calculateDynamicHopDifferences(dynamicHops1, dynamicHops2 []*adminpolicybasedrouteapi.DynamicHop) ([]*adminpolicybasedrouteapi.DynamicHop, error) { + diffDynamic := make([]*adminpolicybasedrouteapi.DynamicHop, 0) + for _, dynamicHop1 := range dynamicHops1 { + var found bool + for _, dynamicHop2 := range dynamicHops2 { + + if reflect.DeepEqual(dynamicHop1, dynamicHop2) { + found = true + break + } + } + if !found { + diffDynamic = append(diffDynamic, dynamicHop1) + } + } + return diffDynamic, nil +} + +// retrieveDynamicGatewayIPsForPolicies returns all the gateway IPs from the dynamic hops of all the policies in the set. This function is used +// to retrieve the dynamic gateway IPs from all the policies applicable to a specific namespace. +func (m *externalPolicyManager) retrieveDynamicGatewayIPsForPolicies(coexistingPolicies sets.Set[string]) (sets.Set[string], error) { + coexistingDynamicIPs := sets.New[string]() + + for name := range coexistingPolicies { + policy, err := m.routeLister.Get(name) + if err != nil { + klog.Warningf("Unable to find route policy %s:%+v", name, err) + continue + } + pp, err := m.processDynamicHopsGatewayInformation(policy.Spec.NextHops.DynamicHops) + if err != nil { + return nil, err + } + for _, gatewayInfo := range pp { + coexistingDynamicIPs = coexistingDynamicIPs.Union(gatewayInfo.gws) + } + } + return coexistingDynamicIPs, nil +} + +// retrieveStaticGatewayIPsForPolicies returns all the gateway IPs from the static hops of all the policies in the set. This function is used +// to retrieve the static gateway IPs from all the policies applicable to a specific namespace. +func (m *externalPolicyManager) retrieveStaticGatewayIPsForPolicies(policies sets.Set[string]) (sets.Set[string], error) { + coexistingStaticIPs := sets.New[string]() + + for name := range policies { + policy, err := m.routeLister.Get(name) + if err != nil { + klog.Warningf("Unable to find route policy %s:%+v", name, err) + continue + } + pp, err := m.processStaticHopsGatewayInformation(policy.Spec.NextHops.StaticHops) + if err != nil { + return nil, err + } + for _, gatewayInfo := range pp { + coexistingStaticIPs = coexistingStaticIPs.Union(gatewayInfo.gws) + } + } + return coexistingStaticIPs, nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/external_controller_policy_test.go b/go-controller/pkg/ovn/controller/apbroute/external_controller_policy_test.go new file mode 100644 index 0000000000..ce5b56a4e1 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/external_controller_policy_test.go @@ -0,0 +1,723 @@ +package apbroute + +import ( + "context" + "sort" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes/fake" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "k8s.io/apimachinery/pkg/runtime" +) + +func newPod(podName, namespace, hostIP string, labels map[string]string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: v1.ObjectMeta{Name: podName, Namespace: namespace, + Labels: labels}, + Spec: corev1.PodSpec{HostNetwork: true}, + Status: corev1.PodStatus{PodIPs: []corev1.PodIP{{IP: hostIP}}, Phase: corev1.PodRunning}, + } +} + +func listRoutePolicyInCache() []string { + return externalController.mgr.routePolicySyncCache.GetKeys() +} + +var ( + externalController *ExternalGatewayMasterController + iFactory *factory.WatchFactory + stopChan chan (struct{}) + initialDB libovsdbtest.TestSetup + nbClient libovsdbclient.Client + nbsbCleanup *libovsdbtest.Cleanup + fakeRouteClient *adminpolicybasedrouteclient.Clientset + fakeClient *fake.Clientset + mgr *externalPolicyManager + err error +) + +func initController(k8sObjects, routePolicyObjects []runtime.Object) { + stopChan = make(chan struct{}) + fakeClient = fake.NewSimpleClientset(k8sObjects...) + fakeRouteClient = adminpolicybasedrouteclient.NewSimpleClientset(routePolicyObjects...) + iFactory, err = factory.NewMasterWatchFactory(&util.OVNMasterClientset{KubeClient: fakeClient}) + Expect(err).NotTo(HaveOccurred()) + iFactory.Start() + externalController, err = NewExternalMasterController(controllerName, fakeClient, + fakeRouteClient, + stopChan, + iFactory.PodCoreInformer(), + iFactory.NamespaceInformer(), + iFactory.NodeCoreInformer().Lister(), + nbClient, + addressset.NewFakeAddressSetFactory(controllerName)) + Expect(err).NotTo(HaveOccurred()) + mgr = externalController.mgr + go func() { + externalController.Run(5) + }() +} + +var _ = Describe("OVN External Gateway policy", func() { + + var ( + namespaceDefault = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "default", + Labels: map[string]string{"name": "default"}}} + namespaceTest = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test", + Labels: map[string]string{"name": "test", "match": "test"}}, + } + namespaceTest2 = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test2", + Labels: map[string]string{"name": "test2", "match": "test"}}, + } + + dynamicPolicy = newPolicy( + "dynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + + staticPolicy = newPolicy( + "static", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP), + nil, + nil, + false, + ) + + pod1 = newPod("pod_1", "default", "192.168.10.1", map[string]string{"key": "pod", "name": "pod1", "duplicated": "true"}) + pod2 = newPod("pod_2", "default", "192.168.20.1", map[string]string{"key": "pod", "name": "pod2"}) + pod3 = newPod("pod_3", "default", "192.168.30.1", map[string]string{"key": "pod", "name": "pod3"}) + pod4 = newPod("pod_4", "default", "192.168.40.1", map[string]string{"key": "pod", "name": "pod4"}) + pod5 = newPod("pod_5", "default", "192.168.50.1", map[string]string{"key": "pod", "name": "pod5"}) + pod6 = newPod("pod_6", "default", "192.168.60.1", map[string]string{"key": "pod", "name": "pod6"}) + ) + AfterEach(func() { + close(stopChan) + nbsbCleanup.Cleanup() + }) + + BeforeEach(func() { + initialDB = libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + }, + } + nbClient, _, nbsbCleanup, err = libovsdbtest.NewNBSBTestHarness(initialDB) + Expect(err).NotTo(HaveOccurred()) + stopChan = make(chan struct{}) + + }) + + var _ = Context("When adding new policies", func() { + + var ( + namespaceTest3 = &corev1.Namespace{ + ObjectMeta: v1.ObjectMeta{Name: "test3", + Labels: map[string]string{"name": "test3", "match": "test"}}, + } + multipleMatchPolicy = newPolicy( + "multiple", + &v1.LabelSelector{MatchLabels: map[string]string{"match": "test"}}, + sets.New("10.10.10.1"), + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + false, + ) + ) + It("registers the new policy with multiple namespace matching", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, namespaceTest3, pod1}, []runtime.Object{multipleMatchPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func(g Gomega) { + p, found, _ := externalController.mgr.getRoutePolicyFromCache(multipleMatchPolicy.Name) + g.Expect(found).To(BeTrue()) + g.Expect(p.Spec).To(BeEquivalentTo(multipleMatchPolicy.Spec)) + }, 5).Should(Succeed()) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(3)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(multipleMatchPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(multipleMatchPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest3.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(multipleMatchPolicy.Name), + staticGateways: gatewayInfoList{{gws: sets.New(staticHopGWIP)}}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + + It("registers a new policy with no namespace match", func() { + initController([]runtime.Object{namespaceTest2, namespaceDefault, pod1}, []runtime.Object{dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(0)) + }) + + It("registers a new policy with multiple dynamic and static GWs and bfd enabled on all gateways", func() { + + staticMultiIPPolicy := newPolicy("multiIPPolicy", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New("10.10.10.1", "10.10.10.2", "10.10.10.3", "10.10.10.3", "10.10.10.4"), + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"key": "pod"}}, + true, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2, pod3, pod4, pod5, pod6}, []runtime.Object{staticMultiIPPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5). + Should(BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticMultiIPPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New("10.10.10.1"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.2"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.3"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.4"), + bfdEnabled: true, + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: pod3.Name}: { + gws: sets.New(pod3.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod4.Name}: { + gws: sets.New(pod4.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod5.Name}: { + gws: sets.New(pod5.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod6.Name}: { + gws: sets.New(pod6.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod1.Name}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + {Namespace: "default", Name: pod2.Name}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + bfdEnabled: true, + }, + }})) + + }) + + It("registers a second policy with no overlaping IPs", func() { + + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1}, []runtime.Object{staticPolicy, dynamicPolicy}) + Eventually(func() []string { return listRoutePolicyInCache() }, 5). + Should(HaveLen(2)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name, dynamicPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + It("registers policies with overlaping IPs for static and dynamic hops", func() { + duplicatedStatic := newPolicy("overlappingStatic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP, "172.1.1.1"), + nil, + nil, + false) + duplicatedDynamic := newPolicy( + "duplicatedDynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"duplicated": "true"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1}, []runtime.Object{staticPolicy, duplicatedStatic, dynamicPolicy, duplicatedDynamic}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5). + Should(HaveLen(4)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name, dynamicPolicy.Name, duplicatedStatic.Name, duplicatedDynamic.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New("172.1.1.1"), + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + }) + + var _ = Context("when deleting a policy", func() { + + var ( + duplicatedStatic = newPolicy("duplicatedStatic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP, "172.1.1.1"), + nil, + nil, + false) + duplicatedDynamic = newPolicy( + "duplicatedDynamic", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"duplicated": "true"}}, + false, + ) + ) + It("validates that the IPs of the policy are no longer reflected on the targeted namespaces when the policy is deleted an no other policy overlaps", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1}, []runtime.Object{staticPolicy, dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + deletePolicy(staticPolicy.Name, fakeRouteClient) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + It("validates that the IPs of a deleted policy won't show up in a non-matching namespace after the policy is deleted", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest2, pod1}, []runtime.Object{staticPolicy, dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + deletePolicy(dynamicPolicy.Name, fakeRouteClient) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(0)) + }) + + It("validates that an overlapping IP from another policy will not be deleted when one of the overlaping policies is deleted", func() { + + initController([]runtime.Object{namespaceTest, namespaceDefault, pod1}, []runtime.Object{staticPolicy, duplicatedStatic, dynamicPolicy, duplicatedDynamic}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(4)) + deletePolicy(staticPolicy.Name, fakeRouteClient) + deletePolicy(dynamicPolicy.Name, fakeRouteClient) + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(duplicatedStatic.Name, duplicatedDynamic.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New("172.1.1.1"), + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + }) + + var _ = Context("when updating a policy", func() { + + It("validates that changing the from selector will retarget the new namespaces", func() { + initController([]runtime.Object{namespaceDefault, namespaceTest, namespaceTest2, pod1}, []runtime.Object{dynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), dynamicPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.From.NamespaceSelector = v1.LabelSelector{MatchLabels: namespaceTest2.Labels} + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should(BeNil()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest2.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(dynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New("192.168.10.1"), + }, + }})) + }) + It("validates that changing a static hop from an existing policy will be applied to the target namespaces", func() { + newStaticIP := "10.30.20.1" + staticPolicy := newPolicy( + "static", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New(staticHopGWIP, newStaticIP), + nil, + nil, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest}, []runtime.Object{staticPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { + nsInfo := getNamespaceInfo(namespaceTest.Name) + sort.Sort(nsInfo.staticGateways) + return nsInfo + }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New(newStaticIP), + }, + }, + dynamicGateways: make(map[types.NamespacedName]*gatewayInfo, 0), + })) + + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), staticPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{ + {IP: newStaticIP}, + } + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5*time.Hour).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New(newStaticIP), + }, + }, + dynamicGateways: make(map[types.NamespacedName]*gatewayInfo, 0), + })) + + }) + It("validates that changes to a dynamic hop from an existing policy will be applied to the target namespaces", func() { + singlePodDynamicPolicy := newPolicy( + "singlePod", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + nil, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "default"}}, + &v1.LabelSelector{MatchLabels: map[string]string{"name": "pod1"}}, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2}, []runtime.Object{singlePodDynamicPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(func() []string { return listNamespaceInfo() }, 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(singlePodDynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_1"}: { + gws: sets.New(pod1.Status.PodIPs[0].IP), + }, + }})) + + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), singlePodDynamicPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.NextHops.DynamicHops[0].PodSelector = v1.LabelSelector{MatchLabels: map[string]string{"name": "pod2"}} + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + Eventually(func() *namespaceInfo { return getNamespaceInfo(namespaceTest.Name) }, 5).Should( + BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(singlePodDynamicPolicy.Name), + staticGateways: gatewayInfoList{}, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{ + {Namespace: "default", Name: "pod_2"}: { + gws: sets.New(pod2.Status.PodIPs[0].IP), + }, + }})) + }) + It("validates that removing one of the static hop IPs will be reflected in the route policy", func() { + + staticMultiIPPolicy := newPolicy("multiIPPolicy", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New("10.10.10.1", "10.10.10.2", "10.10.10.3", "10.10.10.3", "10.10.10.4"), + nil, nil, + true, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2, pod3, pod4, pod5, pod6}, []runtime.Object{staticMultiIPPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(1)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5). + Should(BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticMultiIPPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New("10.10.10.1"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.2"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.3"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.4"), + bfdEnabled: true, + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}, + })) + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), staticMultiIPPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{ + { + IP: "10.10.10.1", + BFDEnabled: true, + }, + { + IP: "10.10.10.2", + BFDEnabled: true, + }, + { + IP: "10.10.10.3", + BFDEnabled: true, + }, + } + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + By("Validating the static refernces don't contain the last element") + Eventually(func() *namespaceInfo { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f + }, 5). + Should(BeEquivalentTo( + &namespaceInfo{ + policies: sets.New(staticMultiIPPolicy.Name), + staticGateways: gatewayInfoList{ + { + gws: sets.New("10.10.10.1"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.2"), + bfdEnabled: true, + }, + { + gws: sets.New("10.10.10.3"), + bfdEnabled: true, + }, + }, + dynamicGateways: map[types.NamespacedName]*gatewayInfo{}})) + }) + It("validates that removing a duplicated static hop IP from an overlapping policy static hop will keep the static IP in the route policy", func() { + + staticMultiIPPolicy := newPolicy("multiIPPolicy", + &v1.LabelSelector{MatchLabels: map[string]string{"name": "test"}}, + sets.New("20.10.10.1", "20.10.10.2", "20.10.10.3", "20.10.10.4", staticHopGWIP), + nil, nil, + false, + ) + initController([]runtime.Object{namespaceDefault, namespaceTest, pod1, pod2, pod3, pod4, pod5, pod6}, []runtime.Object{staticMultiIPPolicy, staticPolicy}) + + Eventually(func() []string { return listRoutePolicyInCache() }, 5).Should(HaveLen(2)) + Eventually(listNamespaceInfo(), 5).Should(HaveLen(1)) + Eventually(func() gatewayInfoList { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f.staticGateways + }, 5). + Should(BeEquivalentTo( + gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New("20.10.10.1"), + }, + { + gws: sets.New("20.10.10.2"), + }, + { + gws: sets.New("20.10.10.3"), + }, + { + gws: sets.New("20.10.10.4"), + }, + })) + Eventually(getNamespaceInfo(namespaceTest.Name).policies).Should(BeEquivalentTo(sets.New(staticMultiIPPolicy.Name, staticPolicy.Name))) + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), staticMultiIPPolicy.Name, v1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{ + { + IP: "20.10.10.2", + }, + { + IP: "20.10.10.3", + }, + { + IP: "20.10.10.4", + }, + { + IP: "20.10.20.1", + }, + } + p.Generation++ + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, v1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + By("Validating the static refernces don't contain the last element") + Eventually(func() gatewayInfoList { + f := getNamespaceInfo(namespaceTest.Name) + sort.Sort(f.staticGateways) + return f.staticGateways + }, 5). + Should(BeEquivalentTo( + gatewayInfoList{ + { + gws: sets.New(staticHopGWIP), + }, + { + gws: sets.New("20.10.10.1"), + }, + { + gws: sets.New("20.10.10.2"), + }, + { + gws: sets.New("20.10.10.3"), + }, + { + gws: sets.New("20.10.10.4"), + }, + })) + Eventually(getNamespaceInfo(namespaceTest.Name).policies).Should(BeEquivalentTo(sets.New(staticMultiIPPolicy.Name, staticPolicy.Name))) + }) + }) +}) diff --git a/go-controller/pkg/ovn/controller/apbroute/master_controller.go b/go-controller/pkg/ovn/controller/apbroute/master_controller.go new file mode 100644 index 0000000000..0148483589 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/master_controller.go @@ -0,0 +1,568 @@ +package apbroute + +import ( + "context" + "fmt" + "reflect" + "strings" + "sync" + "time" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + libovsdbclient "github.com/ovn-org/libovsdb/client" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ktypes "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + "k8s.io/client-go/kubernetes" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + adminpolicybasedrouteinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions" + adminpolicybasedroutelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" +) + +const ( + resyncInterval = 0 + maxRetries = 15 +) + +var ( + controllerName string +) + +// Admin Policy Based Route services + +type ExternalGatewayMasterController struct { + client kubernetes.Interface + apbRoutePolicyClient adminpolicybasedrouteclient.Interface + stopCh <-chan struct{} + + // route policies + + // routerInformer v1apbinformer.AdminPolicyBasedExternalRouteInformer + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister + routeSynced cache.InformerSynced + routeQueue workqueue.RateLimitingInterface + + // Pods + podLister corev1listers.PodLister + podSynced cache.InformerSynced + podQueue workqueue.RateLimitingInterface + + // Namespaces + namespaceQueue workqueue.RateLimitingInterface + namespaceLister corev1listers.NamespaceLister + namespaceSynced cache.InformerSynced + + // External gateway caches + // Make them public so that they can be used by the annotation logic to lock on namespaces and share the same external route information + ExternalGWCache map[ktypes.NamespacedName]*ExternalRouteInfo + ExGWCacheMutex *sync.RWMutex + + routePolicyInformer adminpolicybasedrouteinformer.SharedInformerFactory + + mgr *externalPolicyManager + nbClient *northBoundClient +} + +func NewExternalMasterController( + parentControllerName string, + client kubernetes.Interface, + apbRoutePolicyClient adminpolicybasedrouteclient.Interface, + stopCh <-chan struct{}, + podInformer coreinformers.PodInformer, + namespaceInformer coreinformers.NamespaceInformer, + nodeLister corev1listers.NodeLister, + nbClient libovsdbclient.Client, + addressSetFactory addressset.AddressSetFactory, +) (*ExternalGatewayMasterController, error) { + + controllerName = parentControllerName + routePolicyInformer := adminpolicybasedrouteinformer.NewSharedInformerFactory(apbRoutePolicyClient, resyncInterval) + externalRouteInformer := routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes() + externalGWCache := make(map[ktypes.NamespacedName]*ExternalRouteInfo) + exGWCacheMutex := &sync.RWMutex{} + nbCli := &northBoundClient{ + routeLister: externalRouteInformer.Lister(), + nodeLister: nodeLister, + nbClient: nbClient, + addressSetFactory: addressSetFactory, + externalGWCache: externalGWCache, + exGWCacheMutex: exGWCacheMutex, + } + + c := &ExternalGatewayMasterController{ + client: client, + apbRoutePolicyClient: apbRoutePolicyClient, + stopCh: stopCh, + routeLister: externalRouteInformer.Lister(), + routeSynced: externalRouteInformer.Informer().HasSynced, + routeQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(time.Second, 5*time.Second, 5), + "adminpolicybasedexternalroutes", + ), + podLister: podInformer.Lister(), + podSynced: podInformer.Informer().HasSynced, + podQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(time.Second, 5*time.Second, 5), + "apbexternalroutepods", + ), + namespaceLister: namespaceInformer.Lister(), + namespaceSynced: namespaceInformer.Informer().HasSynced, + namespaceQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(time.Second, 5*time.Second, 5), + "apbexternalroutenamespaces", + ), + ExternalGWCache: externalGWCache, + ExGWCacheMutex: exGWCacheMutex, + routePolicyInformer: routePolicyInformer, + nbClient: nbCli, + mgr: newExternalPolicyManager( + stopCh, + podInformer.Lister(), + namespaceInformer.Lister(), + routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes().Lister(), + nbCli), + } + + _, err := namespaceInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onNamespaceAdd, + UpdateFunc: c.onNamespaceUpdate, + DeleteFunc: c.onNamespaceDelete, + })) + if err != nil { + return nil, err + } + + _, err = podInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onPodAdd, + UpdateFunc: c.onPodUpdate, + DeleteFunc: c.onPodDelete, + })) + if err != nil { + return nil, err + } + _, err = externalRouteInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onPolicyAdd, + UpdateFunc: c.onPolicyUpdate, + DeleteFunc: c.onPolicyDelete, + })) + if err != nil { + return nil, err + } + + return c, nil + +} + +func (c *ExternalGatewayMasterController) Run(threadiness int) { + defer utilruntime.HandleCrash() + klog.Infof("Starting Admin Policy Based Route Controller") + + c.routePolicyInformer.Start(c.stopCh) + + if !cache.WaitForNamedCacheSync("apbexternalroutenamespaces", c.stopCh, c.namespaceSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("apbexternalroutepods", c.stopCh, c.podSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("adminpolicybasedexternalroutes", c.stopCh, c.routeSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + klog.Infof("Repairing Admin Policy Based External Route Services") + c.repair() + + wg := &sync.WaitGroup{} + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // processes route policies + c.runPolicyWorker(wg) + }, time.Second, c.stopCh) + }() + } + + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // detects gateway pod changes and updates the pod's IP and MAC in the northbound DB + c.runPodWorker(wg) + }, time.Second, c.stopCh) + }() + } + + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // detects namespace changes and applies polices that match the namespace selector in the `From` policy field + c.runNamespaceWorker(wg) + }, time.Second, c.stopCh) + }() + } + + // wait until we're told to stop + <-c.stopCh + + c.podQueue.ShutDown() + c.routeQueue.ShutDown() + c.namespaceQueue.ShutDown() + + wg.Wait() + +} + +func (c *ExternalGatewayMasterController) runPolicyWorker(wg *sync.WaitGroup) { + for c.processNextPolicyWorkItem(wg) { + } +} + +func (c *ExternalGatewayMasterController) processNextPolicyWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.routeQueue.Get() + + if shutdown { + return false + } + + defer c.routeQueue.Done(obj) + + item := obj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + klog.Infof("Processing policy %s", item.Name) + err := c.syncRoutePolicy(item) + if err != nil { + if c.routeQueue.NumRequeues(item) < maxRetries { + klog.V(2).InfoS("Error found while processing policy: %w", err) + c.routeQueue.AddRateLimited(item) + return true + } + klog.Warningf("Dropping policy %q out of the queue: %w", item.Name, err) + utilruntime.HandleError(err) + } + c.routeQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayMasterController) syncRoutePolicy(routePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + _, err := c.routeLister.Get(routePolicy.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) { + // DELETE use case + klog.Infof("Deleting policy %s", routePolicy.Name) + err = c.mgr.processDeletePolicy(routePolicy.Name) + if err != nil { + return fmt.Errorf("failed to delete Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + klog.Infof("Policy %s deleted", routePolicy.Name) + return nil + } + currentPolicy, found, markedForDeletion := c.mgr.getRoutePolicyFromCache(routePolicy.Name) + if markedForDeletion { + klog.Warningf("Attempting to add or update route policy %s when it has been marked for deletion. Skipping...", routePolicy.Name) + return nil + } + if !found { + // ADD use case + klog.Infof("Adding policy %s", routePolicy.Name) + pp, err := c.mgr.processAddPolicy(routePolicy) + newErr := c.updateStatusAPBExternalRoute(routePolicy.Name, pp, err) + if err != nil { + return fmt.Errorf("failed to create Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + if newErr != nil { + return fmt.Errorf("failed to update status in Admin Policy Based External Route %s:%w", routePolicy.Name, newErr) + } + return nil + } + // UPDATE use case + klog.Infof("Updating policy %s", routePolicy.Name) + pp, err := c.mgr.processUpdatePolicy(¤tPolicy, routePolicy) + newErr := c.updateStatusAPBExternalRoute(routePolicy.Name, pp, err) + if err != nil { + return fmt.Errorf("failed to update Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + if newErr != nil { + return fmt.Errorf("failed to update status in Admin Policy Based External Route %s:%w", routePolicy.Name, newErr) + } + return nil +} + +func (c *ExternalGatewayMasterController) onPolicyAdd(obj interface{}) { + c.routeQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) onPolicyUpdate(oldObj, newObj interface{}) { + oldRoutePolicy := oldObj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + newRoutePolicy := newObj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + + if oldRoutePolicy.Generation == newRoutePolicy.Generation || + !newRoutePolicy.GetDeletionTimestamp().IsZero() { + return + } + + c.routeQueue.Add(newObj) +} + +func (c *ExternalGatewayMasterController) onPolicyDelete(obj interface{}) { + c.routeQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) onNamespaceAdd(obj interface{}) { + c.namespaceQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) onNamespaceUpdate(oldObj, newObj interface{}) { + oldNamespace := oldObj.(*v1.Namespace) + newNamespace := newObj.(*v1.Namespace) + + if oldNamespace.ResourceVersion == newNamespace.ResourceVersion || !newNamespace.GetDeletionTimestamp().IsZero() { + return + } + c.namespaceQueue.Add(newObj) +} + +func (c *ExternalGatewayMasterController) onNamespaceDelete(obj interface{}) { + c.namespaceQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) runNamespaceWorker(wg *sync.WaitGroup) { + for c.processNextNamespaceWorkItem(wg) { + + } +} + +func (c *ExternalGatewayMasterController) processNextNamespaceWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.namespaceQueue.Get() + + if shutdown { + return false + } + + defer c.namespaceQueue.Done(obj) + + err := c.syncNamespace(obj.(*v1.Namespace)) + if err != nil { + if c.namespaceQueue.NumRequeues(obj) < maxRetries { + klog.V(2).InfoS("Error found while processing namespace %s:%w", obj.(*v1.Namespace), err) + c.namespaceQueue.AddRateLimited(obj) + return true + } + klog.Warningf("Dropping namespace %q out of the queue: %v", obj.(*v1.Namespace).Name, err) + utilruntime.HandleError(err) + } + c.namespaceQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayMasterController) syncNamespace(namespace *v1.Namespace) error { + _, err := c.namespaceLister.Get(namespace.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) { + // DELETE use case + klog.Infof("Deleting namespace reference %s", namespace.Name) + _, found := c.mgr.getNamespaceInfoFromCache(namespace.Name) + if !found { + // namespace is not a recipient for policies + return nil + } + c.mgr.deleteNamespaceInfoInCache(namespace.Name) + c.mgr.unlockNamespaceInfoCache(namespace.Name) + return nil + } + matches, err := c.mgr.getPoliciesForNamespace(namespace.Name) + if err != nil { + return err + } + cacheInfo, found := c.mgr.getNamespaceInfoFromCache(namespace.Name) + if !found && len(matches) == 0 { + // it's not a namespace being cached already and it is not a target for policies, nothing to do + return nil + } + if !found { + // ADD use case + // new namespace or namespace updated its labels and now match a routing policy + defer c.mgr.unlockNamespaceInfoCache(namespace.Name) + cacheInfo = c.mgr.newNamespaceInfoInCache(namespace.Name) + cacheInfo.policies = matches + return c.mgr.processAddNamespace(namespace, cacheInfo) + } + + if !cacheInfo.policies.Equal(matches) { + // UPDATE use case + // policies differ, need to reconcile them + defer c.mgr.unlockNamespaceInfoCache(namespace.Name) + err = c.mgr.processUpdateNamespace(namespace.Name, cacheInfo.policies, matches, cacheInfo) + if err != nil { + return err + } + if cacheInfo.policies.Len() == 0 { + c.mgr.deleteNamespaceInfoInCache(namespace.Name) + } + return nil + } + c.mgr.unlockNamespaceInfoCache(namespace.Name) + return nil + +} + +func (c *ExternalGatewayMasterController) onPodAdd(obj interface{}) { + c.podQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) onPodUpdate(oldObj, newObj interface{}) { + o := oldObj.(*v1.Pod) + n := newObj.(*v1.Pod) + // if labels AND assigned Pod IPs AND networkStatus annotations are the same, skip processing changes to the pod. + if reflect.DeepEqual(o.Labels, n.Labels) && + reflect.DeepEqual(o.Status.PodIPs, n.Status.PodIPs) && + reflect.DeepEqual(o.Annotations[nettypes.NetworkStatusAnnot], n.Annotations[nettypes.NetworkStatusAnnot]) { + return + } + c.podQueue.Add(newObj) +} + +func (c *ExternalGatewayMasterController) onPodDelete(obj interface{}) { + c.podQueue.Add(obj) +} + +func (c *ExternalGatewayMasterController) runPodWorker(wg *sync.WaitGroup) { + for c.processNextPodWorkItem(wg) { + } +} + +func (c *ExternalGatewayMasterController) processNextPodWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.podQueue.Get() + + if shutdown { + return false + } + + defer c.podQueue.Done(obj) + + p := obj.(*v1.Pod) + err := c.syncPod(p) + if err != nil { + if c.podQueue.NumRequeues(obj) < maxRetries { + klog.V(2).InfoS("Error found while processing pod %s/%s:%w", p.Namespace, p.Name, err) + c.podQueue.AddRateLimited(obj) + return true + } + klog.Warningf("Dropping pod %s/%s out of the queue: %s", p.Namespace, p.Name, err) + utilruntime.HandleError(err) + } + + c.podQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayMasterController) syncPod(pod *v1.Pod) error { + + _, err := c.podLister.Pods(pod.Namespace).Get(pod.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + namespaces := c.mgr.filterNamespacesUsingPodGateway(ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}) + klog.Infof("Processing pod %s/%s", pod.Namespace, pod.Name) + if apierrors.IsNotFound(err) || !pod.DeletionTimestamp.IsZero() { + // DELETE case + if namespaces.Len() == 0 { + // nothing to do, this pod is not a gateway pod + return nil + } + klog.Infof("Deleting pod gateway %s/%s", pod.Namespace, pod.Name) + return c.mgr.processDeletePod(pod, namespaces) + } + if namespaces.Len() == 0 { + // ADD case: new pod or existing pod that is not a gateway pod and could now be one. + klog.Infof("Adding pod %s/%s", pod.Namespace, pod.Name) + return c.mgr.processAddPod(pod) + } + // UPDATE case + klog.Infof("Updating pod gateway %s/%s", pod.Namespace, pod.Name) + return c.mgr.processUpdatePod(pod, namespaces) +} + +func (c *ExternalGatewayMasterController) updateStatusAPBExternalRoute(routeName string, processedPolicy *routePolicy, processedError error) error { + + routePolicy, err := c.apbRoutePolicyClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), routeName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return err + } + + gwIPs := sets.New[string]() + if processedError == nil { + for _, static := range processedPolicy.staticGateways { + gwIPs = gwIPs.Union(static.gws) + } + for _, dynamic := range processedPolicy.dynamicGateways { + gwIPs = gwIPs.Union(dynamic.gws) + } + } + updateStatus(routePolicy, strings.Join(sets.List(gwIPs), ","), processedError) + _, err = c.apbRoutePolicyClient.K8sV1().AdminPolicyBasedExternalRoutes().UpdateStatus(context.TODO(), routePolicy, metav1.UpdateOptions{}) + if !apierrors.IsNotFound(err) { + return err + } + return nil +} + +func (c *ExternalGatewayMasterController) GetDynamicGatewayIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + return c.mgr.getDynamicGatewayIPsForTargetNamespace(namespaceName) +} + +func (c *ExternalGatewayMasterController) GetStaticGatewayIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + return c.mgr.getStaticGatewayIPsForTargetNamespace(namespaceName) +} + +func updateStatus(route *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute, gwIPs string, err error) { + if err != nil { + route.Status.Status = adminpolicybasedrouteapi.FailStatus + route.Status.Messages = append(route.Status.Messages, "Failed to apply policy:%w", err.Error()) + return + } + route.Status.LastTransitionTime = metav1.Time{Time: time.Now()} + route.Status.Status = adminpolicybasedrouteapi.SuccessStatus + route.Status.Messages = append(route.Status.Messages, fmt.Sprintf("Configured external gateway IPs: %s", gwIPs)) + klog.Infof("Updating Admin Policy Based External Route %s with Status: %s, Message: %s", route.Name, route.Status.Status, route.Status.Messages[len(route.Status.Messages)-1]) +} diff --git a/go-controller/pkg/ovn/controller/apbroute/network_client.go b/go-controller/pkg/ovn/controller/apbroute/network_client.go new file mode 100644 index 0000000000..f1e1df3769 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/network_client.go @@ -0,0 +1,744 @@ +package apbroute + +import ( + "fmt" + "net" + "regexp" + "strings" + "sync" + + "github.com/pkg/errors" + "github.com/vishvananda/netlink" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + ktypes "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/sets" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" + + libovsdbclient "github.com/ovn-org/libovsdb/client" + "github.com/ovn-org/libovsdb/ovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedroutelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +type networkClient interface { + deleteGatewayIPs(namespaceName string, toBeDeletedGWIPs, toBeKept sets.Set[string]) error + addGatewayIPs(pod *v1.Pod, egress gatewayInfoList) error +} + +type northBoundClient struct { + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister + nodeLister corev1listers.NodeLister + // NorthBound client interface + nbClient libovsdbclient.Client + + // An address set factory that creates address sets + addressSetFactory addressset.AddressSetFactory + externalGWCache map[ktypes.NamespacedName]*ExternalRouteInfo + exGWCacheMutex *sync.RWMutex +} + +type conntrackClient struct { + podLister corev1listers.PodLister +} + +func (nb *northBoundClient) findLogicalRouterStaticRoutesWithPredicate(p func(item *nbdb.LogicalRouterStaticRoute) bool) ([]*nbdb.LogicalRouterStaticRoute, error) { + return libovsdbops.FindLogicalRouterStaticRoutesWithPredicate(nb.nbClient, p) +} +func (nb *northBoundClient) deleteLogicalRouterStaticRoutes(routerName string, lrsrs ...*nbdb.LogicalRouterStaticRoute) error { + return libovsdbops.DeleteLogicalRouterStaticRoutes(nb.nbClient, routerName, lrsrs...) +} + +func (nb *northBoundClient) findLogicalRoutersWithPredicate(p func(item *nbdb.LogicalRouter) bool) ([]*nbdb.LogicalRouter, error) { + return libovsdbops.FindLogicalRoutersWithPredicate(nb.nbClient, p) +} + +// delAllHybridRoutePolicies deletes all the 501 hybrid-route-policies that +// force pod egress traffic to be rerouted to a gateway router for local gateway mode. +// Called when migrating to SGW from LGW. +func (nb *northBoundClient) delAllHybridRoutePolicies() error { + // nuke all the policies + policyPred := func(item *nbdb.LogicalRouterPolicy) bool { + return item.Priority == types.HybridOverlayReroutePriority + } + err := libovsdbops.DeleteLogicalRouterPoliciesWithPredicate(nb.nbClient, types.OVNClusterRouter, policyPred) + if err != nil { + return fmt.Errorf("error deleting hybrid route policies on %s: %v", types.OVNClusterRouter, err) + } + + // nuke all the address-sets. + // if we fail to remove LRP's above, we don't attempt to remove ASes due to dependency constraints. + predicateIDs := libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetHybridNodeRoute, controllerName, nil) + asPred := libovsdbops.GetPredicate[*nbdb.AddressSet](predicateIDs, nil) + err = libovsdbops.DeleteAddressSetsWithPredicate(nb.nbClient, asPred) + if err != nil { + return fmt.Errorf("failed to remove hybrid route address sets: %v", err) + } + + return nil +} + +// delAllLegacyHybridRoutePolicies deletes all the 501 hybrid-route-policies that +// force pod egress traffic to be rerouted to a gateway router for local gateway mode. +// New hybrid route matches on address set, while legacy matches just on pod IP +func (nb *northBoundClient) delAllLegacyHybridRoutePolicies() error { + // nuke all the policies + p := func(item *nbdb.LogicalRouterPolicy) bool { + if item.Priority != types.HybridOverlayReroutePriority { + return false + } + if isNewVer, err := regexp.MatchString(`src\s*==\s*\$`, item.Match); err == nil && isNewVer { + return false + } + return true + } + err := libovsdbops.DeleteLogicalRouterPoliciesWithPredicate(nb.nbClient, types.OVNClusterRouter, p) + if err != nil { + return fmt.Errorf("error deleting legacy hybrid route policies on %s: %v", types.OVNClusterRouter, err) + } + return nil +} + +// deleteGatewayIPs handles deleting static routes for pods on a specific GR. +// If a set of gateways is given, only routes for that gateway are deleted. If no gateways +// are given, all routes for the namespace are deleted. +func (nb *northBoundClient) deleteGatewayIPs(namespace string, toBeDeletedGWIPs, _ sets.Set[string]) error { + for _, routeInfo := range nb.getRouteInfosForNamespace(namespace) { + routeInfo.Lock() + if routeInfo.Deleted { + routeInfo.Unlock() + continue + } + for podIP, routes := range routeInfo.PodExternalRoutes { + for gw, gr := range routes { + if toBeDeletedGWIPs.Has(gw) { + // we cannot delete an external gateway IP from the north bound if it's also being provided by an external gateway annotation or if it is also + // defined by a coexisting policy in the same namespace + if err := nb.deletePodGWRoute(routeInfo, podIP, gw, gr); err != nil { + // if we encounter error while deleting routes for one pod; we return and don't try subsequent pods + routeInfo.Unlock() + return fmt.Errorf("delete pod GW route failed: %w", err) + } + delete(routes, gw) + } + } + } + routeInfo.Unlock() + } + return nil +} + +// getRouteInfosForNamespace returns all routeInfos for a specific namespace +func (nb *northBoundClient) getRouteInfosForNamespace(namespace string) []*ExternalRouteInfo { + nb.exGWCacheMutex.RLock() + defer nb.exGWCacheMutex.RUnlock() + + routes := make([]*ExternalRouteInfo, 0) + for namespacedName, routeInfo := range nb.externalGWCache { + if namespacedName.Namespace == namespace { + routes = append(routes, routeInfo) + } + } + + return routes +} + +func (nb *northBoundClient) addGatewayIPs(pod *v1.Pod, egress gatewayInfoList) error { + if util.PodCompleted(pod) || util.PodWantsHostNetwork(pod) { + return nil + } + podIPs := make([]*net.IPNet, 0) + for _, podIP := range pod.Status.PodIPs { + podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() + cidr := podIPStr + util.GetIPFullMask(podIPStr) + _, ipNet, err := net.ParseCIDR(cidr) + if err != nil { + return fmt.Errorf("failed to parse CIDR: %s, error: %v", cidr, err) + } + podIPs = append(podIPs, ipNet) + } + if len(podIPs) == 0 { + klog.Warningf("Will not add gateway routes pod %s/%s. IPs not found!", pod.Namespace, pod.Name) + return nil + } + if config.Gateway.DisableSNATMultipleGWs { + // delete all perPodSNATs (if this pod was controlled by egressIP controller, it will stop working since + // a pod cannot be used for multiple-external-gateways and egressIPs at the same time) + if err := nb.deletePodSNAT(pod.Spec.NodeName, []*net.IPNet{}, podIPs); err != nil { + klog.Error(err.Error()) + } + } + podNsName := ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name} + return nb.addGWRoutesForPod(egress, podIPs, podNsName, pod.Spec.NodeName) +} + +// deletePodSNAT removes per pod SNAT rules towards the nodeIP that are applied to the GR where the pod resides +// if allSNATs flag is set, then all the SNATs (including against egressIPs if any) for that pod will be deleted +// used when disableSNATMultipleGWs=true +func (nb *northBoundClient) deletePodSNAT(nodeName string, extIPs, podIPNets []*net.IPNet) error { + nats, err := buildPodSNAT(extIPs, podIPNets) + if err != nil { + return err + } + logicalRouter := nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + nodeName, + } + err = libovsdbops.DeleteNATs(nb.nbClient, &logicalRouter, nats...) + if err != nil { + return fmt.Errorf("failed to delete SNAT rule for pod on gateway router %s: %v", logicalRouter.Name, err) + } + return nil +} + +// addEgressGwRoutesForPod handles adding all routes to gateways for a pod on a specific GR +func (nb *northBoundClient) addGWRoutesForPod(gateways []*gatewayInfo, podIfAddrs []*net.IPNet, podNsName ktypes.NamespacedName, node string) error { + gr := util.GetGatewayRouterFromNode(node) + + routesAdded := 0 + portPrefix, err := nb.extSwitchPrefix(node) + if err != nil { + klog.Infof("Failed to find ext switch prefix for %s %v", node, err) + return err + } + + port := portPrefix + types.GWRouterToExtSwitchPrefix + gr + routeInfo, err := nb.ensureRouteInfoLocked(podNsName) + if err != nil { + return fmt.Errorf("failed to ensure routeInfo for %s, error: %v", podNsName, err) + } + defer routeInfo.Unlock() + for _, podIPNet := range podIfAddrs { + for _, gateway := range gateways { + // TODO (trozet): use the go bindings here and batch commands + // validate the ip and gateway belong to the same address family + gws, err := util.MatchAllIPStringFamily(utilnet.IsIPv6(podIPNet.IP), gateway.gws.UnsortedList()) + if err != nil { + klog.Warningf("Address families for the pod address %s and gateway %s did not match", podIPNet.IP.String(), gateway.gws) + continue + } + podIP := podIPNet.IP.String() + for _, gw := range gws { + // if route was already programmed, skip it + if foundGR, ok := routeInfo.PodExternalRoutes[podIP][gw]; ok && foundGR == gr { + routesAdded++ + continue + } + mask := util.GetIPFullMask(podIP) + if err := nb.createOrUpdateBFDStaticRoute(gateway.bfdEnabled, gw, podIP, gr, port, mask); err != nil { + return err + } + if routeInfo.PodExternalRoutes[podIP] == nil { + routeInfo.PodExternalRoutes[podIP] = make(map[string]string) + } + routeInfo.PodExternalRoutes[podIP][gw] = gr + routesAdded++ + if len(routeInfo.PodExternalRoutes[podIP]) == 1 { + if err := nb.addHybridRoutePolicyForPod(podIPNet.IP, node); err != nil { + return err + } + } + } + } + } + // if no routes are added return an error + if routesAdded < 1 { + return fmt.Errorf("gateway specified for namespace %s with gateway addresses %v but no valid routes exist for pod: %s", + podNsName.Namespace, podIfAddrs, podNsName.Name) + } + return nil +} + +// AddHybridRoutePolicyForPod handles adding a higher priority allow policy to allow traffic to be routed normally +// by ecmp routes +func (nb *northBoundClient) addHybridRoutePolicyForPod(podIP net.IP, node string) error { + if config.Gateway.Mode == config.GatewayModeLocal { + // Add podIP to the node's address_set. + asIndex := getHybridRouteAddrSetDbIDs(node, controllerName) + as, err := nb.addressSetFactory.EnsureAddressSet(asIndex) + if err != nil { + return fmt.Errorf("cannot ensure that addressSet for node %s exists %v", node, err) + } + err = as.AddIPs([]net.IP{(podIP)}) + if err != nil { + return fmt.Errorf("unable to add PodIP %s: to the address set %s, err: %v", podIP.String(), node, err) + } + + // add allow policy to bypass lr-policy in GR + ipv4HashedAS, ipv6HashedAS := as.GetASHashNames() + var l3Prefix string + var matchSrcAS string + isIPv6 := utilnet.IsIPv6(podIP) + if isIPv6 { + l3Prefix = "ip6" + matchSrcAS = ipv6HashedAS + } else { + l3Prefix = "ip4" + matchSrcAS = ipv4HashedAS + } + + // get the GR to join switch ip address + grJoinIfAddrs, err := util.GetLRPAddrs(nb.nbClient, types.GWRouterToJoinSwitchPrefix+types.GWRouterPrefix+node) + if err != nil { + return fmt.Errorf("unable to find IP address for node: %s, %s port, err: %v", node, types.GWRouterToJoinSwitchPrefix, err) + } + grJoinIfAddr, err := util.MatchFirstIPNetFamily(utilnet.IsIPv6(podIP), grJoinIfAddrs) + if err != nil { + return fmt.Errorf("failed to match gateway router join interface IPs: %v, err: %v", grJoinIfAddr, err) + } + + var matchDst string + var clusterL3Prefix string + for _, clusterSubnet := range config.Default.ClusterSubnets { + if utilnet.IsIPv6CIDR(clusterSubnet.CIDR) { + clusterL3Prefix = "ip6" + } else { + clusterL3Prefix = "ip4" + } + if l3Prefix != clusterL3Prefix { + continue + } + matchDst += fmt.Sprintf(" && %s.dst != %s", clusterL3Prefix, clusterSubnet.CIDR) + } + + // traffic destined outside of cluster subnet go to GR + matchStr := fmt.Sprintf(`inport == "%s%s" && %s.src == $%s`, types.RouterToSwitchPrefix, node, l3Prefix, matchSrcAS) + matchStr += matchDst + + logicalRouterPolicy := nbdb.LogicalRouterPolicy{ + Priority: types.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{grJoinIfAddr.IP.String()}, + Match: matchStr, + } + p := func(item *nbdb.LogicalRouterPolicy) bool { + return item.Priority == logicalRouterPolicy.Priority && strings.Contains(item.Match, matchSrcAS) + } + err = libovsdbops.CreateOrUpdateLogicalRouterPolicyWithPredicate(nb.nbClient, types.OVNClusterRouter, + &logicalRouterPolicy, p, &logicalRouterPolicy.Nexthops, &logicalRouterPolicy.Match, &logicalRouterPolicy.Action) + if err != nil { + return fmt.Errorf("failed to add policy route %+v to %s: %v", logicalRouterPolicy, types.OVNClusterRouter, err) + } + } + return nil +} + +func (nb *northBoundClient) createOrUpdateBFDStaticRoute(bfdEnabled bool, gw string, podIP, gr, port, mask string) error { + lrsr := nbdb.LogicalRouterStaticRoute{ + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + Nexthop: gw, + IPPrefix: podIP + mask, + OutputPort: &port, + } + + ops := []ovsdb.Operation{} + var err error + if bfdEnabled { + bfd := nbdb.BFD{ + DstIP: gw, + LogicalPort: port, + } + ops, err = libovsdbops.CreateOrUpdateBFDOps(nb.nbClient, ops, &bfd) + if err != nil { + return fmt.Errorf("error creating or updating BFD %+v: %v", bfd, err) + } + lrsr.BFD = &bfd.UUID + } + + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.IPPrefix == lrsr.IPPrefix && + item.Nexthop == lrsr.Nexthop && + item.OutputPort != nil && + *item.OutputPort == *lrsr.OutputPort && + item.Policy == lrsr.Policy + } + ops, err = libovsdbops.CreateOrUpdateLogicalRouterStaticRoutesWithPredicateOps(nb.nbClient, ops, gr, &lrsr, p, + &lrsr.Options) + if err != nil { + return fmt.Errorf("error creating or updating static route %+v on router %s: %v", lrsr, gr, err) + } + + _, err = libovsdbops.TransactAndCheck(nb.nbClient, ops) + if err != nil { + return fmt.Errorf("error transacting static route: %v", err) + } + + return nil +} + +func (nb *northBoundClient) updateExternalGWInfoCacheForPodIPWithGatewayIP(podIP, gwIP, nodeName string, bfdEnabled bool, namespacedName ktypes.NamespacedName) error { + gr := util.GetGatewayRouterFromNode(nodeName) + routeInfo, err := nb.ensureRouteInfoLocked(namespacedName) + if err != nil { + return fmt.Errorf("failed to ensure routeInfo for %s, error: %v", namespacedName.Name, err) + } + defer routeInfo.Unlock() + // if route was already programmed, skip it + if foundGR, ok := routeInfo.PodExternalRoutes[podIP][gwIP]; ok && foundGR == gr { + return nil + } + mask := util.GetIPFullMask(podIP) + + portPrefix, err := nb.extSwitchPrefix(nodeName) + if err != nil { + klog.Infof("Failed to find ext switch prefix for %s %v", nodeName, err) + return err + } + if bfdEnabled { + port := portPrefix + types.GWRouterToExtSwitchPrefix + gr + // update the BFD static route just in case it has changed + if err := nb.createOrUpdateBFDStaticRoute(bfdEnabled, gwIP, podIP, gr, port, mask); err != nil { + return err + } + } else { + _, err := nb.lookupBFDEntry(gwIP, gr, portPrefix) + if err != nil { + err = nb.cleanUpBFDEntry(gwIP, gr, portPrefix) + if err != nil { + return err + } + } + } + + if routeInfo.PodExternalRoutes[podIP] == nil { + routeInfo.PodExternalRoutes[podIP] = make(map[string]string) + } + routeInfo.PodExternalRoutes[podIP][gwIP] = gr + + return nil +} + +// ensureRouteInfoLocked either gets the current routeInfo in the cache with a lock, or creates+locks a new one if missing +func (nb *northBoundClient) ensureRouteInfoLocked(podName ktypes.NamespacedName) (*ExternalRouteInfo, error) { + // We don't want to hold the cache lock while we try to lock the routeInfo (unless we are creating it, then we know + // no one else is using it). This could lead to dead lock. Therefore the steps here are: + // 1. Get the cache lock, try to find the routeInfo + // 2. If routeInfo existed, release the cache lock + // 3. If routeInfo did not exist, safe to hold the cache lock while we create the new routeInfo + nb.exGWCacheMutex.Lock() + routeInfo, ok := nb.externalGWCache[podName] + if !ok { + routeInfo = &ExternalRouteInfo{ + PodExternalRoutes: make(map[string]map[string]string), + PodName: podName, + } + // we are creating routeInfo and going to set it in podExternalRoutes map + // so safe to hold the lock while we create and add it + defer nb.exGWCacheMutex.Unlock() + nb.externalGWCache[podName] = routeInfo + } else { + // if we found an existing routeInfo, do not hold the cache lock + // while waiting for routeInfo to Lock + nb.exGWCacheMutex.Unlock() + } + + // 4. Now lock the routeInfo + routeInfo.Lock() + + // 5. If routeInfo was deleted between releasing the cache lock and grabbing + // the routeInfo lock, return an error so the caller doesn't use it and + // retries the operation later + if routeInfo.Deleted { + routeInfo.Unlock() + return nil, fmt.Errorf("routeInfo for pod %s, was altered during ensure route info", podName) + } + + return routeInfo, nil +} + +func (nb *northBoundClient) deletePodGWRoute(routeInfo *ExternalRouteInfo, podIP, gw, gr string) error { + if utilnet.IsIPv6String(gw) != utilnet.IsIPv6String(podIP) { + return nil + } + + mask := util.GetIPFullMask(podIP) + if err := nb.deleteLogicalRouterStaticRoute(podIP, mask, gw, gr); err != nil { + return fmt.Errorf("unable to delete pod %s ECMP route to GR %s, GW: %s: %w", + routeInfo.PodName, gr, gw, err) + } + + node := util.GetWorkerFromGatewayRouter(gr) + // The gw is deleted from the routes cache after this func is called, length 1 + // means it is the last gw for the pod and the hybrid route policy should be deleted. + if entry := routeInfo.PodExternalRoutes[podIP]; len(entry) == 1 { + if err := nb.delHybridRoutePolicyForPod(net.ParseIP(podIP), node); err != nil { + return fmt.Errorf("unable to delete hybrid route policy for pod %s: err: %v", routeInfo.PodName, err) + } + } + + portPrefix, err := nb.extSwitchPrefix(node) + if err != nil { + return err + } + return nb.cleanUpBFDEntry(gw, gr, portPrefix) +} + +// cleanUpBFDEntry checks if the BFD table entry related to the associated +// gw router / port / gateway ip is referenced by other routing rules, and if +// not removes the entry to avoid having dangling BFD entries. +func (nb *northBoundClient) cleanUpBFDEntry(gatewayIP, gatewayRouter, prefix string) error { + portName := prefix + types.GWRouterToExtSwitchPrefix + gatewayRouter + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + if item.OutputPort != nil && *item.OutputPort == portName && item.Nexthop == gatewayIP && item.BFD != nil && *item.BFD != "" { + return true + } + return false + } + logicalRouterStaticRoutes, err := libovsdbops.FindLogicalRouterStaticRoutesWithPredicate(nb.nbClient, p) + if err != nil { + return fmt.Errorf("cleanUpBFDEntry failed to list routes for %s: %w", portName, err) + } + if len(logicalRouterStaticRoutes) > 0 { + return nil + } + + bfd := nbdb.BFD{ + LogicalPort: portName, + DstIP: gatewayIP, + } + err = libovsdbops.DeleteBFDs(nb.nbClient, &bfd) + if err != nil { + return fmt.Errorf("error deleting BFD %+v: %v", bfd, err) + } + + return nil +} + +func (nb *northBoundClient) deleteLogicalRouterStaticRoute(podIP, mask, gw, gr string) error { + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.Policy != nil && + *item.Policy == nbdb.LogicalRouterStaticRoutePolicySrcIP && + item.IPPrefix == podIP+mask && + item.Nexthop == gw + } + err := libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicate(nb.nbClient, gr, p) + if err != nil { + return fmt.Errorf("error deleting static route from router %s: %v", gr, err) + } + + return nil +} + +// DelHybridRoutePolicyForPod handles deleting a logical route policy that +// forces pod egress traffic to be rerouted to a gateway router for local gateway mode. +func (nb *northBoundClient) delHybridRoutePolicyForPod(podIP net.IP, node string) error { + if config.Gateway.Mode == config.GatewayModeLocal { + // Delete podIP from the node's address_set. + asIndex := getHybridRouteAddrSetDbIDs(node, controllerName) + as, err := nb.addressSetFactory.EnsureAddressSet(asIndex) + if err != nil { + return fmt.Errorf("cannot Ensure that addressSet for node %s exists %v", node, err) + } + err = as.DeleteIPs([]net.IP{(podIP)}) + if err != nil { + return fmt.Errorf("unable to remove PodIP %s: to the address set %s, err: %v", podIP.String(), node, err) + } + + // delete hybrid policy to bypass lr-policy in GR, only if there are zero pods on this node. + ipv4HashedAS, ipv6HashedAS := as.GetASHashNames() + ipv4PodIPs, ipv6PodIPs := as.GetIPs() + deletePolicy := false + var l3Prefix string + var matchSrcAS string + if utilnet.IsIPv6(podIP) { + l3Prefix = "ip6" + if len(ipv6PodIPs) == 0 { + deletePolicy = true + } + matchSrcAS = ipv6HashedAS + } else { + l3Prefix = "ip4" + if len(ipv4PodIPs) == 0 { + deletePolicy = true + } + matchSrcAS = ipv4HashedAS + } + if deletePolicy { + var matchDst string + var clusterL3Prefix string + for _, clusterSubnet := range config.Default.ClusterSubnets { + if utilnet.IsIPv6CIDR(clusterSubnet.CIDR) { + clusterL3Prefix = "ip6" + } else { + clusterL3Prefix = "ip4" + } + if l3Prefix != clusterL3Prefix { + continue + } + matchDst += fmt.Sprintf(" && %s.dst != %s", l3Prefix, clusterSubnet.CIDR) + } + matchStr := fmt.Sprintf(`inport == "%s%s" && %s.src == $%s`, types.RouterToSwitchPrefix, node, l3Prefix, matchSrcAS) + matchStr += matchDst + + p := func(item *nbdb.LogicalRouterPolicy) bool { + return item.Priority == types.HybridOverlayReroutePriority && item.Match == matchStr + } + err := libovsdbops.DeleteLogicalRouterPoliciesWithPredicate(nb.nbClient, types.OVNClusterRouter, p) + if err != nil { + return fmt.Errorf("error deleting policy %s on router %s: %v", matchStr, types.OVNClusterRouter, err) + } + } + if len(ipv4PodIPs) == 0 && len(ipv6PodIPs) == 0 { + // delete address set. + err := as.Destroy() + if err != nil { + return fmt.Errorf("failed to remove address set: %s, on: %s, err: %v", + as.GetName(), node, err) + } + } + } + return nil +} + +// extSwitchPrefix returns the prefix of the external switch to use for +// external gateway routes. In case no second bridge is configured, we +// use the default one and the prefix is empty. +func (nb *northBoundClient) extSwitchPrefix(nodeName string) (string, error) { + node, err := nb.nodeLister.Get(nodeName) + if err != nil { + return "", errors.Wrapf(err, "extSwitchPrefix: failed to find node %s", nodeName) + } + l3GatewayConfig, err := util.ParseNodeL3GatewayAnnotation(node) + if err != nil { + return "", errors.Wrapf(err, "extSwitchPrefix: failed to parse l3 gateway annotation for node %s", nodeName) + } + + if l3GatewayConfig.EgressGWInterfaceID != "" { + return types.EgressGWSwitchPrefix, nil + } + return "", nil +} + +func (nb *northBoundClient) lookupBFDEntry(gatewayIP, gatewayRouter, prefix string) (*nbdb.BFD, error) { + portName := prefix + types.GWRouterToExtSwitchPrefix + gatewayRouter + bfd := nbdb.BFD{ + LogicalPort: portName, + DstIP: gatewayIP, + } + found, err := libovsdbops.LookupBFD(nb.nbClient, &bfd) + if err != nil { + klog.Warningf("Failed to lookup BFD for gateway IP %s, gateway router %s and prefix %s", gatewayIP, gatewayRouter, prefix) + return nil, err + } + + return found, nil +} + +// buildPodSNAT builds per pod SNAT rules towards the nodeIP that are applied to the GR where the pod resides +// if allSNATs flag is set, then all the SNATs (including against egressIPs if any) for that pod will be returned +func buildPodSNAT(extIPs, podIPNets []*net.IPNet) ([]*nbdb.NAT, error) { + nats := make([]*nbdb.NAT, 0, len(extIPs)*len(podIPNets)) + var nat *nbdb.NAT + + for _, podIPNet := range podIPNets { + podIP := podIPNet.IP.String() + mask := util.GetIPFullMask(podIP) + _, fullMaskPodNet, err := net.ParseCIDR(podIP + mask) + if err != nil { + return nil, fmt.Errorf("invalid IP: %s and mask: %s combination, error: %v", podIP, mask, err) + } + if len(extIPs) == 0 { + nat = libovsdbops.BuildSNAT(nil, fullMaskPodNet, "", nil) + } else { + for _, gwIPNet := range extIPs { + gwIP := gwIPNet.IP.String() + if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIP) { + continue + } + nat = libovsdbops.BuildSNAT(&gwIPNet.IP, fullMaskPodNet, "", nil) + } + } + nats = append(nats, nat) + } + return nats, nil +} + +func getHybridRouteAddrSetDbIDs(nodeName, controller string) *libovsdbops.DbObjectIDs { + return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetHybridNodeRoute, controller, + map[libovsdbops.ExternalIDKey]string{ + // there is only 1 address set of this type per node + libovsdbops.ObjectNameKey: nodeName, + }) +} + +func (c *conntrackClient) deleteGatewayIPs(namespaceName string, _, toBeKept sets.Set[string]) error { + // loop through all the IPs on the annotations; ARP for their MACs and form an allowlist + var wg sync.WaitGroup + wg.Add(len(toBeKept)) + validMACs := sync.Map{} + klog.Infof("Keeping conntrack entries in namespace %s with gateway IPs %s", namespaceName, strings.Join(sets.List(toBeKept), ",")) + for gwIP := range toBeKept { + go func(gwIP string) { + defer wg.Done() + if len(gwIP) > 0 && !utilnet.IsIPv6String(gwIP) { + // TODO: Add support for IPv6 external gateways + if hwAddr, err := util.GetMACAddressFromARP(net.ParseIP(gwIP)); err != nil { + klog.Errorf("Failed to lookup hardware address for gatewayIP %s: %v", gwIP, err) + } else if len(hwAddr) > 0 { + // we need to reverse the mac before passing it to the conntrack filter since OVN saves the MAC in the following format + // +------------------------------------------------------------ + + // | 128 ... 112 ... 96 ... 80 ... 64 ... 48 ... 32 ... 16 ... 0| + // +------------------+-------+--------------------+-------------| + // | | UNUSED| MAC ADDRESS | UNUSED | + // +------------------+-------+--------------------+-------------+ + for i, j := 0, len(hwAddr)-1; i < j; i, j = i+1, j-1 { + hwAddr[i], hwAddr[j] = hwAddr[j], hwAddr[i] + } + validMACs.Store(gwIP, []byte(hwAddr)) + } + } + }(gwIP) + } + wg.Wait() + + validNextHopMACs := [][]byte{} + validMACs.Range(func(key interface{}, value interface{}) bool { + validNextHopMACs = append(validNextHopMACs, value.([]byte)) + return true + }) + // Handle corner case where there are 0 IPs on the annotations OR none of the ARPs were successful; i.e allowMACList={empty}. + // This means we *need to* pass a label > 128 bits that will not match on any conntrack entry labels for these pods. + // That way any remaining entries with labels having MACs set will get purged. + if len(validNextHopMACs) == 0 { + validNextHopMACs = append(validNextHopMACs, []byte("does-not-contain-anything")) + } + + pods, err := c.podLister.List(labels.Everything()) + if err != nil { + return fmt.Errorf("unable to get pods from informer: %v", err) + } + + var errors []error + for _, pod := range pods { + pod := pod + podIPs, err := util.GetPodIPsOfNetwork(pod, &util.DefaultNetInfo{}) + if err != nil { + errors = append(errors, fmt.Errorf("unable to fetch IP for pod %s/%s: %v", pod.Namespace, pod.Name, err)) + } + for _, podIP := range podIPs { // flush conntrack only for UDP + // for this pod, we check if the conntrack entry has a label that is not in the provided allowlist of MACs + // only caveat here is we assume egressGW served pods shouldn't have conntrack entries with other labels set + err := util.DeleteConntrack(podIP.String(), 0, v1.ProtocolUDP, netlink.ConntrackOrigDstIP, validNextHopMACs) + if err != nil { + errors = append(errors, fmt.Errorf("failed to delete conntrack entry for pod with IP %s: %v", podIP.String(), err)) + continue + } + } + } + return kerrors.NewAggregate(errors) +} + +// addGatewayIPs is a NOP (no operation) in the conntrack client as it does not add any entry to the conntrack table. +func (c *conntrackClient) addGatewayIPs(pod *v1.Pod, egress gatewayInfoList) error { + return nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/node_controller.go b/go-controller/pkg/ovn/controller/apbroute/node_controller.go new file mode 100644 index 0000000000..8f0d9fd101 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/node_controller.go @@ -0,0 +1,496 @@ +package apbroute + +import ( + "fmt" + "reflect" + "sync" + "time" + + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + ktypes "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + corev1listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/klog/v2" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + adminpolicybasedrouteinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/informers/externalversions" + + adminpolicybasedroutelisters "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/listers/adminpolicybasedroute/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" +) + +// Admin Policy Based Route Node controller + +type ExternalGatewayNodeController struct { + stopCh <-chan struct{} + + // route policies + + // routerInformer v1apbinformer.AdminPolicyBasedExternalRouteInformer + routeLister adminpolicybasedroutelisters.AdminPolicyBasedExternalRouteLister + routeSynced cache.InformerSynced + routeQueue workqueue.RateLimitingInterface + + // Pods + podLister corev1listers.PodLister + podSynced cache.InformerSynced + podQueue workqueue.RateLimitingInterface + + // Namespaces + namespaceQueue workqueue.RateLimitingInterface + namespaceLister corev1listers.NamespaceLister + namespaceSynced cache.InformerSynced + + //external gateway caches + //make them public so that they can be used by the annotation logic to lock on namespaces and share the same external route information + ExternalGWCache map[ktypes.NamespacedName]*ExternalRouteInfo + ExGWCacheMutex *sync.RWMutex + + routePolicyInformer adminpolicybasedrouteinformer.SharedInformerFactory + + mgr *externalPolicyManager +} + +func NewExternalNodeController( + apbRoutePolicyClient adminpolicybasedrouteclient.Interface, + podInformer coreinformers.PodInformer, + namespaceInformer coreinformers.NamespaceInformer, + stopCh <-chan struct{}, +) (*ExternalGatewayNodeController, error) { + + namespaceLister := namespaceInformer.Lister() + routePolicyInformer := adminpolicybasedrouteinformer.NewSharedInformerFactory(apbRoutePolicyClient, resyncInterval) + externalRouteInformer := routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes() + + c := &ExternalGatewayNodeController{ + stopCh: stopCh, + routePolicyInformer: routePolicyInformer, + routeLister: routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes().Lister(), + routeSynced: routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes().Informer().HasSynced, + routeQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(1*time.Second, 5*time.Second, 5), + "apbexternalroutes", + ), + podLister: podInformer.Lister(), + podSynced: podInformer.Informer().HasSynced, + podQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(1*time.Second, 5*time.Second, 5), + "apbexternalroutepods", + ), + namespaceLister: namespaceLister, + namespaceSynced: namespaceInformer.Informer().HasSynced, + namespaceQueue: workqueue.NewNamedRateLimitingQueue( + workqueue.NewItemFastSlowRateLimiter(1*time.Second, 5*time.Second, 5), + "apbexternalroutenamespaces", + ), + mgr: newExternalPolicyManager( + stopCh, + podInformer.Lister(), + namespaceInformer.Lister(), + routePolicyInformer.K8s().V1().AdminPolicyBasedExternalRoutes().Lister(), + &conntrackClient{podLister: podInformer.Lister()}), + } + + _, err := namespaceInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onNamespaceAdd, + UpdateFunc: c.onNamespaceUpdate, + DeleteFunc: c.onNamespaceDelete, + })) + if err != nil { + return nil, err + } + + _, err = podInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onPodAdd, + UpdateFunc: c.onPodUpdate, + DeleteFunc: c.onPodDelete, + })) + if err != nil { + return nil, err + } + _, err = externalRouteInformer.Informer().AddEventHandler( + factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{ + AddFunc: c.onPolicyAdd, + UpdateFunc: c.onPolicyUpdate, + DeleteFunc: c.onPolicyDelete, + })) + if err != nil { + return nil, err + } + + return c, nil + +} + +func (c *ExternalGatewayNodeController) Run(threadiness int) { + defer utilruntime.HandleCrash() + klog.Infof("Starting Admin Policy Based Route Node Controller") + + c.routePolicyInformer.Start(c.stopCh) + + if !cache.WaitForNamedCacheSync("apbexternalroutenamespaces", c.stopCh, c.namespaceSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("apbexternalroutepods", c.stopCh, c.podSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + if !cache.WaitForNamedCacheSync("adminpolicybasedexternalroutes", c.stopCh, c.routeSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + klog.Infof("Synchronization failed") + return + } + + wg := &sync.WaitGroup{} + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // processes route policies + c.runPolicyWorker(wg) + }, time.Second, c.stopCh) + }() + } + + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // detects gateway pod changes and updates the pod's IP and MAC in the northbound DB + c.runPodWorker(wg) + }, time.Second, c.stopCh) + }() + } + + for i := 0; i < threadiness; i++ { + wg.Add(1) + go func() { + defer wg.Done() + wait.Until(func() { + // detects namespace changes and applies polices that match the namespace selector in the `From` policy field + c.runNamespaceWorker(wg) + }, time.Second, c.stopCh) + }() + } + + // wait until we're told to stop + <-c.stopCh + + c.podQueue.ShutDown() + c.routeQueue.ShutDown() + c.namespaceQueue.ShutDown() + + wg.Wait() + +} + +func (c *ExternalGatewayNodeController) onNamespaceAdd(obj interface{}) { + c.namespaceQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) onNamespaceUpdate(oldObj, newObj interface{}) { + oldNamespace := oldObj.(*v1.Namespace) + newNamespace := newObj.(*v1.Namespace) + + if oldNamespace.ResourceVersion == newNamespace.ResourceVersion || !newNamespace.GetDeletionTimestamp().IsZero() { + return + } + c.namespaceQueue.Add(newObj) +} + +func (c *ExternalGatewayNodeController) onNamespaceDelete(obj interface{}) { + c.namespaceQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) runPolicyWorker(wg *sync.WaitGroup) { + for c.processNextPolicyWorkItem(wg) { + } +} + +func (c *ExternalGatewayNodeController) processNextPolicyWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.routeQueue.Get() + + if shutdown { + return false + } + + defer c.routeQueue.Done(obj) + + item := obj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + klog.Infof("Processing policy %s", item.Name) + err := c.syncRoutePolicy(item) + if err != nil { + if c.routeQueue.NumRequeues(item) < maxRetries { + klog.V(2).InfoS("Error found while processing policy: %v", err.Error()) + c.routeQueue.AddRateLimited(item) + return true + } + klog.Warningf("Dropping policy %q out of the queue: %v", item.Name, err) + utilruntime.HandleError(err) + } + c.routeQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayNodeController) syncRoutePolicy(routePolicy *adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) error { + _, err := c.routeLister.Get(routePolicy.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) { + // DELETE use case + klog.Infof("Deleting policy %s", routePolicy.Name) + err := c.mgr.processDeletePolicy(routePolicy.Name) + if err != nil { + return fmt.Errorf("failed to delete Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + klog.Infof("Policy %s deleted", routePolicy.Name) + return nil + } + currentPolicy, found, markedForDeletion := c.mgr.getRoutePolicyFromCache(routePolicy.Name) + if markedForDeletion { + klog.Warningf("Attempting to add or update route policy %s when it has been marked for deletion. Skipping...", routePolicy.Name) + return nil + } + if !found { + // ADD use case + klog.Infof("Adding policy %s", routePolicy.Name) + _, err := c.mgr.processAddPolicy(routePolicy) + if err != nil { + return fmt.Errorf("failed to create Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + return nil + } + // UPDATE use case + klog.Infof("Updating policy %s", routePolicy.Name) + _, err = c.mgr.processUpdatePolicy(¤tPolicy, routePolicy) + if err != nil { + return fmt.Errorf("failed to update Admin Policy Based External Route %s:%w", routePolicy.Name, err) + } + return nil +} + +func (c *ExternalGatewayNodeController) onPolicyAdd(obj interface{}) { + c.routeQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) onPolicyUpdate(oldObj, newObj interface{}) { + oldRoutePolicy := oldObj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + newRoutePolicy := newObj.(*adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute) + + if oldRoutePolicy.Generation == newRoutePolicy.Generation || + !newRoutePolicy.GetDeletionTimestamp().IsZero() { + return + } + + c.routeQueue.Add(newObj) +} + +func (c *ExternalGatewayNodeController) onPolicyDelete(obj interface{}) { + c.routeQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) runNamespaceWorker(wg *sync.WaitGroup) { + for c.processNextNamespaceWorkItem(wg) { + + } +} + +func (c *ExternalGatewayNodeController) processNextNamespaceWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.namespaceQueue.Get() + + if shutdown { + return false + } + + defer c.namespaceQueue.Done(obj) + + err := c.syncNamespace(obj.(*v1.Namespace)) + if err != nil { + if c.namespaceQueue.NumRequeues(obj) < maxRetries { + klog.V(2).InfoS("Error found while processing namespace %s:%w", obj.(*v1.Namespace), err) + c.namespaceQueue.AddRateLimited(obj) + return true + } + klog.Warningf("Dropping namespace %q out of the queue: %v", obj.(*v1.Namespace).Name, err) + utilruntime.HandleError(err) + } + c.namespaceQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayNodeController) syncNamespace(namespace *v1.Namespace) error { + _, err := c.namespaceLister.Get(namespace.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if apierrors.IsNotFound(err) || !namespace.DeletionTimestamp.IsZero() { + // DELETE use case + + klog.Infof("Deleting namespace reference %s", namespace.Name) + _, found := c.mgr.getNamespaceInfoFromCache(namespace.Name) + if !found { + // namespace is not a recipient for policies + return nil + } + c.mgr.deleteNamespaceInfoInCache(namespace.Name) + c.mgr.unlockNamespaceInfoCache(namespace.Name) + return nil + } + matches, err := c.mgr.getPoliciesForNamespace(namespace.Name) + if err != nil { + return err + } + cacheInfo, found := c.mgr.getNamespaceInfoFromCache(namespace.Name) + if !found && len(matches) == 0 { + // it's not a namespace being cached already and it is not a target for policies, nothing to do + return nil + } + if !found { + // ADD use case + // new namespace or namespace updated its labels and now match a routing policy + defer c.mgr.unlockNamespaceInfoCache(namespace.Name) + cacheInfo = c.mgr.newNamespaceInfoInCache(namespace.Name) + cacheInfo.policies = matches + return c.mgr.processAddNamespace(namespace, cacheInfo) + } + + if !cacheInfo.policies.Equal(matches) { + // UPDATE use case + // policies differ, need to reconcile them + defer c.mgr.unlockNamespaceInfoCache(namespace.Name) + err = c.mgr.processUpdateNamespace(namespace.Name, cacheInfo.policies, matches, cacheInfo) + if err != nil { + return err + } + if cacheInfo.policies.Len() == 0 { + c.mgr.deleteNamespaceInfoInCache(namespace.Name) + } + return nil + } + c.mgr.unlockNamespaceInfoCache(namespace.Name) + return nil + +} + +func (c *ExternalGatewayNodeController) onPodAdd(obj interface{}) { + o := obj.(*v1.Pod) + // if the pod does not have IPs AND there are no multus network status annotations found, skip it + if len(o.Status.PodIPs) == 0 && len(o.Annotations[nettypes.NetworkStatusAnnot]) == 0 { + return + } + c.podQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) onPodUpdate(oldObj, newObj interface{}) { + o := oldObj.(*v1.Pod) + n := newObj.(*v1.Pod) + + // if labels AND assigned Pod IPs AND the multus network status annotations are the same, skip processing changes to the pod. + if reflect.DeepEqual(o.Labels, n.Labels) && + reflect.DeepEqual(o.Status.PodIPs, n.Status.PodIPs) && + reflect.DeepEqual(o.Annotations[nettypes.NetworkStatusAnnot], n.Annotations[nettypes.NetworkStatusAnnot]) { + return + } + c.podQueue.Add(newObj) +} + +func (c *ExternalGatewayNodeController) onPodDelete(obj interface{}) { + c.podQueue.Add(obj) +} + +func (c *ExternalGatewayNodeController) runPodWorker(wg *sync.WaitGroup) { + for c.processNextPodWorkItem(wg) { + } +} + +func (c *ExternalGatewayNodeController) processNextPodWorkItem(wg *sync.WaitGroup) bool { + wg.Add(1) + defer wg.Done() + + obj, shutdown := c.podQueue.Get() + + if shutdown { + return false + } + + defer c.podQueue.Done(obj) + + p := obj.(*v1.Pod) + err := c.syncPod(p) + if err != nil { + if c.podQueue.NumRequeues(obj) < maxRetries { + klog.V(2).InfoS("Error found while processing pod %s/%s:%w", p.Namespace, p.Name, err) + c.podQueue.AddRateLimited(obj) + return true + } + klog.Warningf("Dropping pod %s/%s out of the queue: %s", p.Namespace, p.Name, err) + utilruntime.HandleError(err) + } + + c.podQueue.Forget(obj) + return true +} + +func (c *ExternalGatewayNodeController) syncPod(pod *v1.Pod) error { + + _, err := c.podLister.Pods(pod.Namespace).Get(pod.Name) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + namespaces := c.mgr.filterNamespacesUsingPodGateway(ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}) + klog.Infof("Processing pod reference %s/%s", pod.Namespace, pod.Name) + if apierrors.IsNotFound(err) || !pod.DeletionTimestamp.IsZero() { + // DELETE case + if namespaces.Len() == 0 { + // nothing to do, this pod is not a gateway pod + return nil + } + klog.Infof("Deleting pod gateway %s/%s", pod.Namespace, pod.Name) + return c.mgr.processDeletePod(pod, namespaces) + } + if namespaces.Len() == 0 { + // ADD case: new pod or existing pod that is not a gateway pod and could now be one. + klog.Infof("Adding pod reference %s/%s", pod.Namespace, pod.Name) + return c.mgr.processAddPod(pod) + } + // UPDATE case + klog.Infof("Updating pod gateway %s/%s", pod.Namespace, pod.Name) + return c.mgr.processUpdatePod(pod, namespaces) +} + +func (c *ExternalGatewayNodeController) GetAdminPolicyBasedExternalRouteIPsForTargetNamespace(namespaceName string) (sets.Set[string], error) { + gwIPs, err := c.mgr.getDynamicGatewayIPsForTargetNamespace(namespaceName) + if err != nil { + return nil, err + } + tmpIPs, err := c.mgr.getStaticGatewayIPsForTargetNamespace(namespaceName) + if err != nil { + return nil, err + } + + return gwIPs.Union(tmpIPs), nil +} diff --git a/go-controller/pkg/ovn/controller/apbroute/repair.go b/go-controller/pkg/ovn/controller/apbroute/repair.go new file mode 100644 index 0000000000..e1db71fd15 --- /dev/null +++ b/go-controller/pkg/ovn/controller/apbroute/repair.go @@ -0,0 +1,367 @@ +package apbroute + +import ( + "net" + "strings" + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + ktypes "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + utilnet "k8s.io/utils/net" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +type managedGWIPs struct { + namespacedName ktypes.NamespacedName + nodeName string + gwList gatewayInfoList +} + +func (c *ExternalGatewayMasterController) repair() { + start := time.Now() + defer func() { + klog.Infof("Syncing exgw routes took %v", time.Since(start)) + }() + + // migration from LGW to SGW mode + // for shared gateway mode, these LRPs shouldn't exist, so delete them all + if config.Gateway.Mode == config.GatewayModeShared { + if err := c.nbClient.delAllHybridRoutePolicies(); err != nil { + klog.Errorf("Error while removing hybrid policies on moving to SGW mode, error: %v", err) + } + } else if config.Gateway.Mode == config.GatewayModeLocal { + // remove all legacy hybrid route policies + if err := c.nbClient.delAllLegacyHybridRoutePolicies(); err != nil { + klog.Errorf("Error while removing legacy hybrid policies, error: %v", err) + } + } + + // Get all ECMP routes in OVN and build cache + ovnRouteCache := c.buildOVNECMPCache() + + if len(ovnRouteCache) == 0 { + // Even if no ECMP routes exist, we should ensure no 501 LRPs exist either + if err := c.nbClient.delAllHybridRoutePolicies(); err != nil { + klog.Errorf("Error while removing hybrid policies, error: %v", err) + } + // nothing in OVN, so no reason to search for stale routes + return + } + + // Build cache of expected routes in the cluster + // map[podIP]set[podNamespacedName,nodeName,expectedGWIPs] + policyGWIPsMap, err := c.buildExternalIPGatewaysFromPolicyRules() + if err != nil { + klog.Errorf("Error while aggregating the external policy routes: %v", err) + } + + annotatedGWIPsMap, err := c.buildExternalIPGatewaysFromAnnotations() + if err != nil { + klog.Errorf("Cannot retrieve the annotated gateway IPs:%w", err) + } + + // compare caches and see if OVN routes are stale + for podIP, ovnRoutes := range ovnRouteCache { + // pod IP does not exist in the cluster + // remove route and any hybrid policy + expectedNextHopsPolicy, okPolicy := policyGWIPsMap[podIP] + expectedNextHopsAnnotation, okAnnotation := annotatedGWIPsMap[podIP] + if !okPolicy && !okAnnotation { + // No external gateways found for this Pod IP + continue + } + + for _, ovnRoute := range ovnRoutes { + // if length of the output port is 0, this is a legacy route (we now always specify output interface) + if len(ovnRoute.outport) == 0 { + continue + } + + node := util.GetWorkerFromGatewayRouter(ovnRoute.router) + // prefix will signify secondary exgw bridge, or empty if normal setup + // have to determine if a node changed while master was down and if the route swapped from + // the default bridge to a new secondary bridge (or vice versa) + prefix, err := c.nbClient.extSwitchPrefix(node) + if err != nil { + // we shouldn't continue in this case, because we cant be sure this is a route we want to remove + klog.Errorf("Cannot sync exgw route: %+v, unable to determine exgw switch prefix: %v", + ovnRoute, err) + } else if (prefix != "" && !strings.Contains(ovnRoute.outport, prefix)) || + (prefix == "" && strings.Contains(ovnRoute.outport, types.EgressGWSwitchPrefix)) { + continue + } + if expectedNextHopsPolicy != nil { + ovnRoute.shouldExist = c.processOVNRoute(ovnRoute, expectedNextHopsPolicy.gwList, podIP, expectedNextHopsPolicy) + if ovnRoute.shouldExist { + continue + } + } + if expectedNextHopsAnnotation != nil { + ovnRoute.shouldExist = c.processOVNRoute(ovnRoute, expectedNextHopsAnnotation.gwList, podIP, expectedNextHopsAnnotation) + } + } + } + + klog.Infof("OVN ECMP route cache is: %+v", ovnRouteCache) + klog.Infof("Cluster ECMP route cache is: %+v", policyGWIPsMap) + + // iterate through ovn routes and remove any stale entries + for podIP, ovnRoutes := range ovnRouteCache { + podHasAnyECMPRoutes := false + for _, ovnRoute := range ovnRoutes { + if !ovnRoute.shouldExist { + klog.Infof("Found stale exgw ecmp route, podIP: %s, nexthop: %s, router: %s", + podIP, ovnRoute.nextHop, ovnRoute.router) + lrsr := nbdb.LogicalRouterStaticRoute{UUID: ovnRoute.uuid} + err := c.nbClient.deleteLogicalRouterStaticRoutes(ovnRoute.router, &lrsr) + // err := + if err != nil { + klog.Errorf("Error deleting static route %s from router %s: %v", ovnRoute.uuid, ovnRoute.router, err) + } + + // check to see if we should also clean up bfd + node := util.GetWorkerFromGatewayRouter(ovnRoute.router) + // prefix will signify secondary exgw bridge, or empty if normal setup + // have to determine if a node changed while master was down and if the route swapped from + // the default bridge to a new secondary bridge (or vice versa) + prefix, err := c.nbClient.extSwitchPrefix(node) + if err != nil { + // we shouldn't continue in this case, because we cant be sure this is a route we want to remove + klog.Errorf("Cannot sync exgw bfd: %+v, unable to determine exgw switch prefix: %v", + ovnRoute, err) + } else { + if err := c.nbClient.cleanUpBFDEntry(ovnRoute.nextHop, ovnRoute.router, prefix); err != nil { + klog.Errorf("Cannot clean up BFD entry: %w", err) + } + } + + } else { + podHasAnyECMPRoutes = true + } + } + + // if pod had no ECMP routes we need to make sure we remove logical route policy for local gw mode + if !podHasAnyECMPRoutes { + for _, ovnRoute := range ovnRoutes { + gr := strings.TrimPrefix(ovnRoute.router, types.GWRouterPrefix) + if err := c.nbClient.delHybridRoutePolicyForPod(net.ParseIP(podIP), gr); err != nil { + klog.Errorf("Error while removing hybrid policy for pod IP: %s, on node: %s, error: %v", + podIP, gr, err) + } + } + } + } +} + +func (c *ExternalGatewayMasterController) buildExternalIPGatewaysFromPolicyRules() (map[string]*managedGWIPs, error) { + + clusterRouteCache := make(map[string]*managedGWIPs) + externalRoutePolicies, err := c.routeLister.List(labels.Everything()) + if err != nil { + return nil, err + } + + for _, policy := range externalRoutePolicies { + p, err := c.mgr.processExternalRoutePolicy(policy) + if err != nil { + return nil, err + } + // store the policy manifest in the routePolicy cache to avoid hitting the informer every time the annotation logic recalls all the gw IPs from the CRs. + err = c.mgr.storeRoutePolicyInCache(policy) + if err != nil { + return nil, err + } + nsList, err := c.mgr.listNamespacesBySelector(p.targetNamespacesSelector) + if err != nil { + return nil, err + } + allGWIPs := make(gatewayInfoList, 0) + allGWIPs = append(allGWIPs, p.staticGateways...) + for _, gw := range p.dynamicGateways { + allGWIPs = append(allGWIPs, gw) + } + for _, ns := range nsList { + nsPods, err := c.podLister.Pods(ns.Name).List(labels.Everything()) + if err != nil { + return nil, err + } + for _, nsPod := range nsPods { + // ignore completed pods, host networked pods, pods not scheduled + if util.PodWantsHostNetwork(nsPod) || util.PodCompleted(nsPod) || !util.PodScheduled(nsPod) { + continue + } + for _, podIP := range nsPod.Status.PodIPs { + podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() + clusterRouteCache[podIPStr] = &managedGWIPs{namespacedName: ktypes.NamespacedName{Namespace: nsPod.Namespace, Name: nsPod.Name}, nodeName: nsPod.Spec.NodeName, gwList: make(gatewayInfoList, 0)} + for _, gwInfo := range allGWIPs { + for gw := range gwInfo.gws { + if utilnet.IsIPv6String(gw) != utilnet.IsIPv6String(podIPStr) { + continue + } + clusterRouteCache[podIPStr].gwList = append(clusterRouteCache[podIPStr].gwList, gwInfo) + } + } + } + } + } + + } + // flag the route policy cache as populated so that the logic to retrieve the dynamic and static gw IPs from the annotation side can use the cache instead of hitting the informer. + c.mgr.setRoutePolicyCacheAsPopulated() + return clusterRouteCache, nil +} + +func (c *ExternalGatewayMasterController) processOVNRoute(ovnRoute *ovnRoute, gwList gatewayInfoList, podIP string, managedIPGWInfo *managedGWIPs) bool { + // podIP exists, check if route matches + for _, gwInfo := range gwList { + for clusterNextHop := range gwInfo.gws { + if ovnRoute.nextHop == clusterNextHop { + // populate the externalGWInfo cache with this pair podIP->next Hop IP. + err := c.nbClient.updateExternalGWInfoCacheForPodIPWithGatewayIP(podIP, ovnRoute.nextHop, managedIPGWInfo.nodeName, gwInfo.bfdEnabled, managedIPGWInfo.namespacedName) + if err == nil { + return true + } + klog.Errorf("Failed to add cache routeInfo for %s, error: %v", managedIPGWInfo.namespacedName.Name, err) + } + } + } + return false +} + +func (c *ExternalGatewayMasterController) buildExternalIPGatewaysFromAnnotations() (map[string]*managedGWIPs, error) { + clusterRouteCache := make(map[string]*managedGWIPs, 0) + + nsList, err := c.namespaceLister.List(labels.Everything()) + if err != nil { + return nil, err + } + for _, ns := range nsList { + if nsGWIPs, ok := ns.Annotations[util.RoutingExternalGWsAnnotation]; ok && nsGWIPs != "" { + gwInfo := &gatewayInfo{gws: sets.New[string]()} + for _, ip := range strings.Split(nsGWIPs, ",") { + podIPStr := utilnet.ParseIPSloppy(ip).String() + gwInfo.gws.Insert(podIPStr) + } + if _, ok := ns.Annotations[util.BfdAnnotation]; ok { + gwInfo.bfdEnabled = true + } + nsPodList, err := c.podLister.Pods(ns.Name).List(labels.Everything()) + if err != nil { + return nil, err + } + // iterate through all the pods in the namespace and associate the gw ips to those that correspond + populateManagedGWIPsCacheInNamespace(ns.Name, gwInfo, clusterRouteCache, nsPodList) + } + } + + podList, err := c.podLister.List(labels.Everything()) + if err != nil { + return nil, err + } + for _, pod := range podList { + networkName, ok := pod.Annotations[util.RoutingNetworkAnnotation] + if !ok { + continue + } + targetNamespaces, ok := pod.Annotations[util.RoutingNamespaceAnnotation] + if !ok { + continue + } + foundGws, err := getExGwPodIPs(pod, networkName) + if err != nil { + klog.Errorf("Error getting exgw IPs for pod: %s, error: %v", pod.Name, err) + return nil, err + } + if foundGws.Len() == 0 { + klog.Errorf("No pod IPs found for pod %s/%s", pod.Namespace, pod.Name) + continue + } + gwInfo := &gatewayInfo{gws: foundGws} + if _, ok := pod.Annotations[util.BfdAnnotation]; ok { + gwInfo.bfdEnabled = true + } + for _, targetNs := range strings.Split(targetNamespaces, ",") { + // iterate through all pods and associate the gw ips to those that correspond + populateManagedGWIPsCacheInNamespace(targetNs, gwInfo, clusterRouteCache, podList) + } + } + return clusterRouteCache, nil +} + +func populateManagedGWIPsCacheInNamespace(targetNamespace string, gwInfo *gatewayInfo, cache map[string]*managedGWIPs, podList []*v1.Pod) { + for gwIP := range gwInfo.gws { + for _, pod := range podList { + // ignore completed pods, host networked pods, pods not scheduled + if util.PodWantsHostNetwork(pod) || util.PodCompleted(pod) || !util.PodScheduled(pod) { + continue + } + for _, podIP := range pod.Status.PodIPs { + podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() + if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIPStr) { + continue + } + if _, ok := cache[podIPStr]; !ok { + cache[podIPStr] = &managedGWIPs{ + namespacedName: ktypes.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}, + nodeName: pod.Spec.NodeName, + } + } + cache[podIPStr].gwList = append(cache[podIPStr].gwList, &gatewayInfo{gws: sets.New(gwIP), bfdEnabled: gwInfo.bfdEnabled}) + } + } + } +} + +// Build cache of routes in OVN +// map[podIP][]ovnRoute +type ovnRoute struct { + nextHop string + uuid string + router string + outport string + shouldExist bool +} + +func (c *ExternalGatewayMasterController) buildOVNECMPCache() map[string][]*ovnRoute { + p := func(item *nbdb.LogicalRouterStaticRoute) bool { + return item.Options["ecmp_symmetric_reply"] == "true" + } + logicalRouterStaticRoutes, err := c.nbClient.findLogicalRouterStaticRoutesWithPredicate(p) + if err != nil { + klog.Errorf("CleanECMPRoutes: failed to list ecmp routes: %v", err) + return nil + } + + ovnRouteCache := make(map[string][]*ovnRoute) + for _, logicalRouterStaticRoute := range logicalRouterStaticRoutes { + p := func(item *nbdb.LogicalRouter) bool { + return util.SliceHasStringItem(item.StaticRoutes, logicalRouterStaticRoute.UUID) + } + logicalRouters, err := c.nbClient.findLogicalRoutersWithPredicate(p) + if err != nil { + klog.Errorf("CleanECMPRoutes: failed to find logical router for %s, err: %v", logicalRouterStaticRoute.UUID, err) + continue + } + + route := &ovnRoute{ + nextHop: logicalRouterStaticRoute.Nexthop, + uuid: logicalRouterStaticRoute.UUID, + router: logicalRouters[0].Name, + outport: *logicalRouterStaticRoute.OutputPort, + } + podIP, _, _ := net.ParseCIDR(logicalRouterStaticRoute.IPPrefix) + if _, ok := ovnRouteCache[podIP.String()]; !ok { + ovnRouteCache[podIP.String()] = []*ovnRoute{route} + } else { + ovnRouteCache[podIP.String()] = append(ovnRouteCache[podIP.String()], route) + } + } + return ovnRouteCache +} diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 6018f5c75f..af825651a1 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -17,6 +17,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + apbroutecontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/apbroute" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" svccontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/services" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/unidling" @@ -51,8 +52,8 @@ type DefaultNetworkController struct { // cluster's east-west traffic. loadbalancerClusterCache map[kapi.Protocol]string - externalGWCache map[ktypes.NamespacedName]*externalRouteInfo - exGWCacheMutex sync.RWMutex + externalGWCache map[ktypes.NamespacedName]*apbroutecontroller.ExternalRouteInfo + exGWCacheMutex *sync.RWMutex // egressFirewalls is a map of namespaces and the egressFirewall attached to it egressFirewalls sync.Map @@ -93,6 +94,9 @@ type DefaultNetworkController struct { svcController *svccontroller.Controller // Controller used to handle egress services egressSvcController *egresssvc.Controller + + // Controller used to handle the admin policy based external route resources + apbExternalRouteController *apbroutecontroller.ExternalGatewayMasterController // svcFactory used to handle service related events svcFactory informers.SharedInformerFactory @@ -163,6 +167,20 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, zoneICHandler = zoneic.NewZoneInterconnectHandler(&util.DefaultNetInfo{}, cnci.nbClient, cnci.sbClient) zoneChassisHandler = zoneic.NewZoneChassisHandler(cnci.sbClient) } + apbExternalRouteController, err := apbroutecontroller.NewExternalMasterController( + DefaultNetworkControllerName, + cnci.client, + cnci.kube.APBRouteClient, + defaultStopChan, + cnci.watchFactory.PodCoreInformer(), + cnci.watchFactory.NamespaceInformer(), + cnci.watchFactory.NodeCoreInformer().Lister(), + cnci.nbClient, + addressSetFactory, + ) + if err != nil { + return nil, fmt.Errorf("unable to create new admin policy based external route controller while creating new default network controller :%w", err) + } oc := &DefaultNetworkController{ BaseNetworkController: BaseNetworkController{ @@ -181,8 +199,8 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, wg: defaultWg, localZoneNodes: &sync.Map{}, }, - externalGWCache: make(map[ktypes.NamespacedName]*externalRouteInfo), - exGWCacheMutex: sync.RWMutex{}, + externalGWCache: apbExternalRouteController.ExternalGWCache, + exGWCacheMutex: apbExternalRouteController.ExGWCacheMutex, eIPC: egressIPZoneController{ nodeIPUpdateMutex: &sync.Mutex{}, podAssignmentMutex: &sync.Mutex{}, @@ -199,6 +217,7 @@ func newDefaultNetworkControllerCommon(cnci *CommonNetworkControllerInfo, svcFactory: svcFactory, zoneICHandler: zoneICHandler, zoneChassisHandler: zoneChassisHandler, + apbExternalRouteController: apbExternalRouteController, } // Allocate IPs for logical router port "GwRouterToJoinSwitchPrefix + OVNClusterRouter". This should always @@ -383,10 +402,6 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { klog.Infof("Starting all the Watchers...") start := time.Now() - // Sync external gateway routes. External gateway may be set in namespaces - // or via pods. So execute an individual sync method at startup - WithSyncDurationMetricNoError("external gateway routes", oc.cleanExGwECMPRoutes) - // WatchNamespaces() should be started first because it has no other // dependencies, and WatchNodes() depends on it if err := WithSyncDurationMetric("namespace", oc.WatchNamespaces); err != nil { @@ -497,6 +512,12 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { }() } + oc.wg.Add(1) + go func() { + defer oc.wg.Done() + oc.apbExternalRouteController.Run(1) + }() + end := time.Since(start) klog.Infof("Completing all the Watchers took %v", end) metrics.MetricMasterSyncDuration.WithLabelValues("all watchers").Set(end.Seconds()) diff --git a/go-controller/pkg/ovn/egressgw.go b/go-controller/pkg/ovn/egressgw.go index c63735d56b..360143c71a 100644 --- a/go-controller/pkg/ovn/egressgw.go +++ b/go-controller/pkg/ovn/egressgw.go @@ -6,8 +6,6 @@ import ( "net" "regexp" "strings" - "sync" - "time" utilnet "k8s.io/utils/net" @@ -17,6 +15,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + apbroutecontroller "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/apbroute" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" "github.com/pkg/errors" @@ -34,26 +33,6 @@ type gatewayInfo struct { bfdEnabled bool } -// Build cache of routes in OVN -// map[podIP][]ovnRoute -type ovnRoute struct { - nextHop string - uuid string - router string - outport string - shouldExist bool -} - -type externalRouteInfo struct { - sync.Mutex - deleted bool - podName ktypes.NamespacedName - // podExternalRoutes is a cache keeping the LR routes added to the GRs when - // external gateways are used. The first map key is the podIP (src-ip of the route), - // the second the GW IP (next hop), and the third the GR name - podExternalRoutes map[string]map[string]string -} - func getHybridRouteAddrSetDbIDs(nodeName, controller string) *libovsdbops.DbObjectIDs { return libovsdbops.NewDbObjectIDs(libovsdbops.AddressSetHybridNodeRoute, controller, map[libovsdbops.ExternalIDKey]string{ @@ -63,7 +42,7 @@ func getHybridRouteAddrSetDbIDs(nodeName, controller string) *libovsdbops.DbObje } // ensureRouteInfoLocked either gets the current routeInfo in the cache with a lock, or creates+locks a new one if missing -func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.NamespacedName) (*externalRouteInfo, error) { +func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.NamespacedName) (*apbroutecontroller.ExternalRouteInfo, error) { // We don't want to hold the cache lock while we try to lock the routeInfo (unless we are creating it, then we know // no one else is using it). This could lead to dead lock. Therefore the steps here are: // 1. Get the cache lock, try to find the routeInfo @@ -72,9 +51,9 @@ func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.Namespa oc.exGWCacheMutex.Lock() routeInfo, ok := oc.externalGWCache[podName] if !ok { - routeInfo = &externalRouteInfo{ - podExternalRoutes: make(map[string]map[string]string), - podName: podName, + routeInfo = &apbroutecontroller.ExternalRouteInfo{ + PodExternalRoutes: make(map[string]map[string]string), + PodName: podName, } // we are creating routeInfo and going to set it in podExternalRoutes map // so safe to hold the lock while we create and add it @@ -92,7 +71,7 @@ func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.Namespa // 5. If routeInfo was deleted between releasing the cache lock and grabbing // the routeInfo lock, return an error so the caller doesn't use it and // retries the operation later - if routeInfo.deleted { + if routeInfo.Deleted { routeInfo.Unlock() return nil, fmt.Errorf("routeInfo for pod %s, was altered during ensure route info", podName) } @@ -101,11 +80,11 @@ func (oc *DefaultNetworkController) ensureRouteInfoLocked(podName ktypes.Namespa } // getRouteInfosForNamespace returns all routeInfos for a specific namespace -func (oc *DefaultNetworkController) getRouteInfosForNamespace(namespace string) []*externalRouteInfo { +func (oc *DefaultNetworkController) getRouteInfosForNamespace(namespace string) []*apbroutecontroller.ExternalRouteInfo { oc.exGWCacheMutex.RLock() defer oc.exGWCacheMutex.RUnlock() - routes := make([]*externalRouteInfo, 0) + routes := make([]*apbroutecontroller.ExternalRouteInfo, 0) for namespacedName, routeInfo := range oc.externalGWCache { if namespacedName.Namespace == namespace { routes = append(routes, routeInfo) @@ -116,7 +95,7 @@ func (oc *DefaultNetworkController) getRouteInfosForNamespace(namespace string) } // deleteRouteInfoLocked removes a routeInfo from the cache, and returns it locked -func (oc *DefaultNetworkController) deleteRouteInfoLocked(name ktypes.NamespacedName) *externalRouteInfo { +func (oc *DefaultNetworkController) deleteRouteInfoLocked(name ktypes.NamespacedName) *apbroutecontroller.ExternalRouteInfo { // Attempt to find the routeInfo in the cache, release the cache lock while // we try to lock the routeInfo to avoid any deadlock oc.exGWCacheMutex.RLock() @@ -128,12 +107,12 @@ func (oc *DefaultNetworkController) deleteRouteInfoLocked(name ktypes.Namespaced } routeInfo.Lock() - if routeInfo.deleted { + if routeInfo.Deleted { routeInfo.Unlock() return nil } - routeInfo.deleted = true + routeInfo.Deleted = true go func() { oc.exGWCacheMutex.Lock() @@ -327,7 +306,7 @@ func (oc *DefaultNetworkController) deleteLogicalRouterStaticRoute(podIP, mask, // deletePodGWRoute deletes all associated gateway routing resources for one // pod gateway route -func (oc *DefaultNetworkController) deletePodGWRoute(routeInfo *externalRouteInfo, podIP, gw, gr string) error { +func (oc *DefaultNetworkController) deletePodGWRoute(routeInfo *apbroutecontroller.ExternalRouteInfo, podIP, gw, gr string) error { if utilnet.IsIPv6String(gw) != utilnet.IsIPv6String(podIP) { return nil } @@ -335,18 +314,18 @@ func (oc *DefaultNetworkController) deletePodGWRoute(routeInfo *externalRouteInf mask := util.GetIPFullMask(podIP) if err := oc.deleteLogicalRouterStaticRoute(podIP, mask, gw, gr); err != nil { return fmt.Errorf("unable to delete pod %s ECMP route to GR %s, GW: %s: %w", - routeInfo.podName, gr, gw, err) + routeInfo.PodName, gr, gw, err) } klog.V(5).Infof("ECMP route deleted for pod: %s, on gr: %s, to gw: %s", - routeInfo.podName, gr, gw) + routeInfo.PodName, gr, gw) node := util.GetWorkerFromGatewayRouter(gr) // The gw is deleted from the routes cache after this func is called, length 1 // means it is the last gw for the pod and the hybrid route policy should be deleted. - if entry := routeInfo.podExternalRoutes[podIP]; len(entry) == 1 { + if entry := routeInfo.PodExternalRoutes[podIP]; len(entry) == 1 { if err := oc.delHybridRoutePolicyForPod(net.ParseIP(podIP), node); err != nil { - return fmt.Errorf("unable to delete hybrid route policy for pod %s: err: %v", routeInfo.podName, err) + return fmt.Errorf("unable to delete hybrid route policy for pod %s: err: %v", routeInfo.PodName, err) } } @@ -420,15 +399,25 @@ func (oc *DefaultNetworkController) deletePodGWRoutesForNamespace(pod *kapi.Pod, // are given, all routes for the namespace are deleted. func (oc *DefaultNetworkController) deleteGWRoutesForNamespace(namespace string, matchGWs sets.Set[string]) error { deleteAll := (matchGWs == nil || matchGWs.Len() == 0) + + policyGWIPs, err := oc.apbExternalRouteController.GetDynamicGatewayIPsForTargetNamespace(namespace) + if err != nil { + return err + } + policyStaticGWIPs, err := oc.apbExternalRouteController.GetStaticGatewayIPsForTargetNamespace(namespace) + if err != nil { + return err + } + policyGWIPs.Union(policyStaticGWIPs) for _, routeInfo := range oc.getRouteInfosForNamespace(namespace) { routeInfo.Lock() - if routeInfo.deleted { + if routeInfo.Deleted { routeInfo.Unlock() continue } - for podIP, routes := range routeInfo.podExternalRoutes { + for podIP, routes := range routeInfo.PodExternalRoutes { for gw, gr := range routes { - if deleteAll || matchGWs.Has(gw) { + if (deleteAll || matchGWs.Has(gw)) && !policyGWIPs.Has(gw) { if err := oc.deletePodGWRoute(routeInfo, podIP, gw, gr); err != nil { // if we encounter error while deleting routes for one pod; we return and don't try subsequent pods routeInfo.Unlock() @@ -453,12 +442,12 @@ func (oc *DefaultNetworkController) deleteGWRoutesForPod(name ktypes.NamespacedN for _, podIPNet := range podIPNets { podIP := podIPNet.IP.String() - routes, ok := routeInfo.podExternalRoutes[podIP] + routes, ok := routeInfo.PodExternalRoutes[podIP] if !ok { continue } if len(routes) == 0 { - delete(routeInfo.podExternalRoutes, podIP) + delete(routeInfo.PodExternalRoutes, podIP) continue } for gw, gr := range routes { @@ -498,7 +487,7 @@ func (oc *DefaultNetworkController) addGWRoutesForPod(gateways []*gatewayInfo, p podIP := podIPNet.IP.String() for _, gw := range gws { // if route was already programmed, skip it - if foundGR, ok := routeInfo.podExternalRoutes[podIP][gw]; ok && foundGR == gr { + if foundGR, ok := routeInfo.PodExternalRoutes[podIP][gw]; ok && foundGR == gr { routesAdded++ continue } @@ -507,12 +496,12 @@ func (oc *DefaultNetworkController) addGWRoutesForPod(gateways []*gatewayInfo, p if err := oc.createBFDStaticRoute(gateway.bfdEnabled, gw, podIP, gr, port, mask); err != nil { return err } - if routeInfo.podExternalRoutes[podIP] == nil { - routeInfo.podExternalRoutes[podIP] = make(map[string]string) + if routeInfo.PodExternalRoutes[podIP] == nil { + routeInfo.PodExternalRoutes[podIP] = make(map[string]string) } - routeInfo.podExternalRoutes[podIP][gw] = gr + routeInfo.PodExternalRoutes[podIP][gw] = gr routesAdded++ - if len(routeInfo.podExternalRoutes[podIP]) == 1 { + if len(routeInfo.PodExternalRoutes[podIP]) == 1 { if err := oc.addHybridRoutePolicyForPod(podIPNet.IP, node); err != nil { return err } @@ -878,135 +867,6 @@ func (oc *DefaultNetworkController) extSwitchPrefix(nodeName string) (string, er return "", nil } -func (oc *DefaultNetworkController) cleanExGwECMPRoutes() { - start := time.Now() - defer func() { - klog.Infof("Syncing exgw routes took %v", time.Since(start)) - }() - - // migration from LGW to SGW mode - // for shared gateway mode, these LRPs shouldn't exist, so delete them all - if config.Gateway.Mode == config.GatewayModeShared { - if err := oc.delAllHybridRoutePolicies(); err != nil { - klog.Errorf("Error while removing hybrid policies on moving to SGW mode, error: %v", err) - } - } else if config.Gateway.Mode == config.GatewayModeLocal { - // remove all legacy hybrid route policies - if err := oc.delAllLegacyHybridRoutePolicies(); err != nil { - klog.Errorf("Error while removing legacy hybrid policies, error: %v", err) - } - } - - // Get all ECMP routes in OVN and build cache - ovnRouteCache := oc.buildOVNECMPCache() - - if len(ovnRouteCache) == 0 { - // Even if no ECMP routes exist, we should ensure no 501 LRPs exist either - if err := oc.delAllHybridRoutePolicies(); err != nil { - klog.Errorf("Error while removing hybrid policies, error: %v", err) - } - // nothing in OVN, so no reason to search for stale routes - return - } - - // Build cache of expected routes in the cluster - // map[podIP][]nextHops - clusterRouteCache := make(map[string][]string) - - // Find all pods serving as exgw - oc.buildClusterECMPCacheFromPods(clusterRouteCache) - - // Get all namespaces with exgw routes specified - oc.buildClusterECMPCacheFromNamespaces(clusterRouteCache) - - // compare caches and see if OVN routes are stale - for podIP, ovnRoutes := range ovnRouteCache { - // pod IP does not exist in the cluster - // remove route and any hybrid policy - if _, ok := clusterRouteCache[podIP]; !ok { - continue - } - - // podIP exists, check if route matches - expectedNexthops := clusterRouteCache[podIP] - for _, ovnRoute := range ovnRoutes { - // if length of the output port is 0, this is a legacy route (we now always specify output interface) - if len(ovnRoute.outport) == 0 { - continue - } - - node := util.GetWorkerFromGatewayRouter(ovnRoute.router) - // prefix will signify secondary exgw bridge, or empty if normal setup - // have to determine if a node changed while master was down and if the route swapped from - // the default bridge to a new secondary bridge (or vice versa) - prefix, err := oc.extSwitchPrefix(node) - if err != nil { - // we shouldn't continue in this case, because we cant be sure this is a route we want to remove - klog.Errorf("Cannot sync exgw route: %+v, unable to determine exgw switch prefix: %v", - ovnRoute, err) - } else if (prefix != "" && !strings.Contains(ovnRoute.outport, prefix)) || - (prefix == "" && strings.Contains(ovnRoute.outport, types.EgressGWSwitchPrefix)) { - continue - } - - for _, clusterNexthop := range expectedNexthops { - if ovnRoute.nextHop == clusterNexthop { - ovnRoute.shouldExist = true - } - } - } - } - - klog.Infof("OVN ECMP route cache is: %+v", ovnRouteCache) - klog.Infof("Cluster ECMP route cache is: %+v", clusterRouteCache) - - // iterate through ovn routes and remove any stale entries - for podIP, ovnRoutes := range ovnRouteCache { - podHasAnyECMPRoutes := false - for _, ovnRoute := range ovnRoutes { - if !ovnRoute.shouldExist { - klog.Infof("Found stale exgw ecmp route, podIP: %s, nexthop: %s, router: %s", - podIP, ovnRoute.nextHop, ovnRoute.router) - lrsr := nbdb.LogicalRouterStaticRoute{UUID: ovnRoute.uuid} - err := libovsdbops.DeleteLogicalRouterStaticRoutes(oc.nbClient, ovnRoute.router, &lrsr) - if err != nil { - klog.Errorf("Error deleting static route %s from router %s: %v", ovnRoute.uuid, ovnRoute.router, err) - } - - // check to see if we should also clean up bfd - node := util.GetWorkerFromGatewayRouter(ovnRoute.router) - // prefix will signify secondary exgw bridge, or empty if normal setup - // have to determine if a node changed while master was down and if the route swapped from - // the default bridge to a new secondary bridge (or vice versa) - prefix, err := oc.extSwitchPrefix(node) - if err != nil { - // we shouldn't continue in this case, because we cant be sure this is a route we want to remove - klog.Errorf("Cannot sync exgw bfd: %+v, unable to determine exgw switch prefix: %v", - ovnRoute, err) - } else { - if err := oc.cleanUpBFDEntry(ovnRoute.nextHop, ovnRoute.router, prefix); err != nil { - klog.Errorf("Cannot clean up BFD entry: %w", err) - } - } - - } else { - podHasAnyECMPRoutes = true - } - } - - // if pod had no ECMP routes we need to make sure we remove logical route policy for local gw mode - if !podHasAnyECMPRoutes { - for _, ovnRoute := range ovnRoutes { - gr := strings.TrimPrefix(ovnRoute.router, types.GWRouterPrefix) - if err := oc.delHybridRoutePolicyForPod(net.ParseIP(podIP), gr); err != nil { - klog.Errorf("Error while removing hybrid policy for pod IP: %s, on node: %s, error: %v", - podIP, gr, err) - } - } - } - } -} - func getExGwPodIPs(gatewayPod *kapi.Pod) (sets.Set[string], error) { foundGws := sets.New[string]() if gatewayPod.Annotations[util.RoutingNetworkAnnotation] != "" { @@ -1041,142 +901,6 @@ func getExGwPodIPs(gatewayPod *kapi.Pod) (sets.Set[string], error) { return foundGws, nil } -func (oc *DefaultNetworkController) buildClusterECMPCacheFromNamespaces(clusterRouteCache map[string][]string) { - namespaces, err := oc.watchFactory.GetNamespaces() - if err != nil { - klog.Errorf("Error getting all namespaces for exgw ecmp route sync: %v", err) - return - } - for _, namespace := range namespaces { - if _, ok := namespace.Annotations[util.RoutingExternalGWsAnnotation]; !ok { - continue - } - // namespace has exgw routes, build cache - gwIPs, err := util.ParseRoutingExternalGWAnnotation(namespace.Annotations[util.RoutingExternalGWsAnnotation]) - if err != nil { - klog.Errorf("Unable to clean ExGw ECMP routes for namespace: %s, %v", namespace.Name, err) - continue - } - // get all pods in the namespace - nsPods, err := oc.watchFactory.GetPods(namespace.Name) - if err != nil { - klog.Errorf("Unable to clean ExGw ECMP routes for namespace: %s, %v", - namespace, err) - continue - } - for _, gwIP := range gwIPs.UnsortedList() { - for _, nsPod := range nsPods { - // ignore completed pods, host networked pods, pods not scheduled - if util.PodWantsHostNetwork(nsPod) || util.PodCompleted(nsPod) || !util.PodScheduled(nsPod) { - continue - } - for _, podIP := range nsPod.Status.PodIPs { - podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() - if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIPStr) { - continue - } - if val, ok := clusterRouteCache[podIPStr]; ok { - // add gwIP to cache only if buildClusterECMPCacheFromPods hasn't already added it - gwIPexists := false - for _, existingGwIP := range val { - if existingGwIP == gwIP { - gwIPexists = true - break - } - } - if !gwIPexists { - clusterRouteCache[podIPStr] = append(clusterRouteCache[podIPStr], gwIP) - } - } else { - clusterRouteCache[podIPStr] = []string{gwIP} - } - } - } - } - } -} - -func (oc *DefaultNetworkController) buildClusterECMPCacheFromPods(clusterRouteCache map[string][]string) { - // Get all Pods serving as exgws - pods, err := oc.watchFactory.GetAllPods() - if err != nil { - klog.Error("Error getting all pods for exgw ecmp route sync: %v", err) - return - } - for _, pod := range pods { - podRoutingNamespaceAnno := pod.Annotations[util.RoutingNamespaceAnnotation] - if podRoutingNamespaceAnno == "" { - continue - } - // get all pods in the namespace - nsPods, err := oc.watchFactory.GetPods(podRoutingNamespaceAnno) - if err != nil { - klog.Errorf("Unable to clean ExGw ECMP routes for exgw: %s, serving namespace: %s, %v", - pod.Name, podRoutingNamespaceAnno, err) - continue - } - - // pod is serving as exgw, build cache - gwIPs, err := getExGwPodIPs(pod) - if err != nil { - klog.Errorf("Error getting exgw IPs for pod: %s, error: %v", pod.Name, err) - continue - } - for _, gwIP := range gwIPs.UnsortedList() { - for _, nsPod := range nsPods { - // ignore completed pods, host networked pods, pods not scheduled - if util.PodWantsHostNetwork(nsPod) || util.PodCompleted(nsPod) || !util.PodScheduled(nsPod) { - continue - } - for _, podIP := range nsPod.Status.PodIPs { - podIPStr := utilnet.ParseIPSloppy(podIP.IP).String() - if utilnet.IsIPv6String(gwIP) != utilnet.IsIPv6String(podIPStr) { - continue - } - clusterRouteCache[podIPStr] = append(clusterRouteCache[podIPStr], gwIP) - } - } - } - } -} - -func (oc *DefaultNetworkController) buildOVNECMPCache() map[string][]*ovnRoute { - p := func(item *nbdb.LogicalRouterStaticRoute) bool { - return item.Options["ecmp_symmetric_reply"] == "true" - } - logicalRouterStaticRoutes, err := libovsdbops.FindLogicalRouterStaticRoutesWithPredicate(oc.nbClient, p) - if err != nil { - klog.Errorf("CleanECMPRoutes: failed to list ecmp routes: %v", err) - return nil - } - - ovnRouteCache := make(map[string][]*ovnRoute) - for _, logicalRouterStaticRoute := range logicalRouterStaticRoutes { - p := func(item *nbdb.LogicalRouter) bool { - return util.SliceHasStringItem(item.StaticRoutes, logicalRouterStaticRoute.UUID) - } - logicalRouters, err := libovsdbops.FindLogicalRoutersWithPredicate(oc.nbClient, p) - if err != nil { - klog.Errorf("CleanECMPRoutes: failed to find logical router for %s, err: %v", logicalRouterStaticRoute.UUID, err) - continue - } - - route := &ovnRoute{ - nextHop: logicalRouterStaticRoute.Nexthop, - uuid: logicalRouterStaticRoute.UUID, - router: logicalRouters[0].Name, - outport: *logicalRouterStaticRoute.OutputPort, - } - podIP, _, _ := net.ParseCIDR(logicalRouterStaticRoute.IPPrefix) - if _, ok := ovnRouteCache[podIP.String()]; !ok { - ovnRouteCache[podIP.String()] = []*ovnRoute{route} - } else { - ovnRouteCache[podIP.String()] = append(ovnRouteCache[podIP.String()], route) - } - } - return ovnRouteCache -} - func makePodGWKey(pod *kapi.Pod) string { return fmt.Sprintf("%s_%s", pod.Namespace, pod.Name) } diff --git a/go-controller/pkg/ovn/external_gateway_test.go b/go-controller/pkg/ovn/external_gateway_test.go new file mode 100644 index 0000000000..fd938c27f5 --- /dev/null +++ b/go-controller/pkg/ovn/external_gateway_test.go @@ -0,0 +1,2891 @@ +package ovn + +import ( + "context" + "encoding/json" + "fmt" + "net" + "sync" + "time" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" + adminpolicybasedrouteclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" + libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb" + + ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/onsi/ginkgo" + "github.com/onsi/ginkgo/extensions/table" + "github.com/onsi/gomega" + "github.com/urfave/cli/v2" +) + +var _ = ginkgo.Describe("OVN Egress Gateway Operations", func() { + const ( + namespaceName = "namespace1" + ) + var ( + app *cli.App + fakeOvn *FakeOVN + + bfd1NamedUUID = "bfd-1-UUID" + bfd2NamedUUID = "bfd-2-UUID" + logicalRouterPort = "rtoe-GR_node1" + ) + + ginkgo.BeforeEach(func() { + // Restore global default values before each testcase + gomega.Expect(config.PrepareTestConfig()).To(gomega.Succeed()) + + app = cli.NewApp() + app.Name = "test" + app.Flags = config.Flags + + fakeOvn = NewFakeOVN(true) + }) + + ginkgo.AfterEach(func() { + fakeOvn.shutdown() + }) + + ginkgo.Context("on setting namespace gateway static hop", func() { + + table.DescribeTable("reconciles an new pod with namespace single exgw static GW already set", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1"), bfd, nil, nil, bfd, ""), + }, + }, + ) + + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles an new pod with namespace single exgw static gateway already set with pod event first", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1"), bfd, nil, nil, bfd, ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Create(context.TODO(), &namespaceT, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles an new pod with namespace double exgw static gateways already set", func(bfd bool, finalNB []libovsdbtest.TestData) { + + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1", "9.0.0.2"), bfd, nil, nil, bfd, ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.BFD{ + UUID: bfd2NamedUUID, + DstIP: "9.0.0.2", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + BFD: &bfd2NamedUUID, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }), + ) + + table.DescribeTable("reconciles deleting a pod with namespace double exgw static gateway already set", + func(bfd bool, + initNB []libovsdbtest.TestData, + syncNB []libovsdbtest.TestData, + finalNB []libovsdbtest.TestData, + ) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: initNB, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(syncNB)) + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + p := newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1", "9.0.0.2"), bfd, nil, nil, bfd, "") + _, err = fakeOvn.fakeClient.AdminPolicyRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Create(context.Background(), &p, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + deletePod(t.namespace, t.podName, fakeOvn.fakeClient.KubeClient) + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("No BFD", false, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + table.Entry("BFD", true, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.BFD{ + UUID: bfd2NamedUUID, + DstIP: "9.0.0.2", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + BFD: &bfd1NamedUUID, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + BFD: &bfd2NamedUUID, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + ) + + table.DescribeTable("reconciles deleting a pod with namespace double exgw static gateway already set IPV6", + func(bfd bool, + initNB, syncNB, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "fd00:10:244:2::0/64", + "fd00:10:244:2::2", + "fd00:10:244:2::1", + "myPod", + "fd00:10:244:2::3", + "0a:58:49:a1:93:cb", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: initNB, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + ) + config.IPv6Mode = true + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(syncNB)) + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/64"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/64", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + p := newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("fd2e:6f44:5dd8::89", "fd2e:6f44:5dd8::76"), bfd, nil, nil, bfd, "") + _, err = fakeOvn.fakeClient.AdminPolicyRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Create(context.Background(), &p, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + deletePod(t.namespace, t.podName, fakeOvn.fakeClient.KubeClient) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("BFD IPV6", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "fd00:10:244:2::3/128", + BFD: &bfd1NamedUUID, + OutputPort: &logicalRouterPort, + Nexthop: "fd2e:6f44:5dd8::89", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "fd00:10:244:2::3/128", + BFD: &bfd1NamedUUID, + OutputPort: &logicalRouterPort, + Nexthop: "fd2e:6f44:5dd8::76", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.BFD{ + UUID: bfd2NamedUUID, + DstIP: "fd2e:6f44:5dd8::76", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "fd2e:6f44:5dd8::89", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:49:a1:93:cb fd00:10:244:2::3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:49:a1:93:cb fd00:10:244:2::3"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + ), + ) + + table.DescribeTable("reconciles deleting a exgw namespace with active pod", + func(bfd bool, + initNB []libovsdbtest.TestData, + finalNB []libovsdbtest.TestData, + ) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: initNB, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, sets.NewString("9.0.0.1", "9.0.0.2"), bfd, nil, nil, bfd, ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + + deleteNamespace(t.namespace, fakeOvn.fakeClient.KubeClient) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("No BFD", false, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + table.Entry("BFD", true, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.BFD{ + UUID: "bfd1-UUID", + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.BFD{ + UUID: "bfd2-UUID", + DstIP: "9.0.0.2", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + BFD: &bfd2NamedUUID, + Nexthop: "9.0.0.2", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + )) + }) + + ginkgo.Context("on setting pod dynamic gateways", func() { + table.DescribeTable("reconciles a host networked pod acting as a exgw for another namespace for new pod", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "9.0.0.1") + gwPod.Spec.HostNetwork = true + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + gwPod, + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, + nil, + bfd, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + bfd, + ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Create(context.TODO(), newPod(t.namespace, t.podName, t.nodeName, t.podIP), metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles a host networked pod acting as a exgw for another namespace for existing pod", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "9.0.0.1") + gwPod.Spec.HostNetwork = true + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, + nil, + bfd, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + bfd, + ""), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespaceX.Name).Create(context.TODO(), &gwPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles a multus networked pod acting as a exgw for another namespace for new pod", func(bfd bool, finalNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + ns := nettypes.NetworkStatus{Name: "dummy", IPs: []string{"11.0.0.1"}} + networkStatuses := []nettypes.NetworkStatus{ns} + nsEncoded, err := json.Marshal(networkStatuses) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "9.0.0.1") + gwPod.Annotations = map[string]string{ + "k8s.v1.cni.cncf.io/network-status": string(nsEncoded), + } + gwPod.Spec.HostNetwork = true + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + gwPod, + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, + nil, + bfd, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + bfd, + "dummy"), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err = fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(t.namespace).Create(context.TODO(), newPod(t.namespace, t.podName, t.nodeName, t.podIP), metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, table.Entry("No BFD", false, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "11.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "11.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "11.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + })) + + table.DescribeTable("reconciles deleting a host networked pod acting as a exgw for another namespace for existing pod", + func(bfd bool, + beforeDeleteNB []libovsdbtest.TestData, + afterDeleteNB []libovsdbtest.TestData) { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "9.0.0.1") + gwPod.Spec.HostNetwork = true + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceName}}, + nil, + bfd, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + bfd, + "", + ), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespaceX.Name).Create(context.TODO(), &gwPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(beforeDeleteNB)) + + deletePod(gwPod.Namespace, gwPod.Name, fakeOvn.fakeClient.KubeClient) + + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(afterDeleteNB)) + gomega.Eventually(func() string { + return getNamespaceAnnotations(fakeOvn.fakeClient.KubeClient, namespaceT.Name)[util.ExternalGatewayPodIPsAnnotation] + }, 5).Should(gomega.Equal("")) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }, + table.Entry("No BFD", false, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + table.Entry("BFD Enabled", true, []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }, + []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + }, + ), + ) + }) + ginkgo.Context("on using bfd", func() { + ginkgo.It("should enable bfd only on the namespace gw when set", func() { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "10.0.0.1") + gwPod.Spec.HostNetwork = true + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + true, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + false, + "", + ), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespaceX.Name).Create(context.TODO(), &gwPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + BFD: &bfd1NamedUUID, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "10.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + } + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should enable bfd only on the gw pod when set", func() { + app.Action = func(ctx *cli.Context) error { + + namespaceT := *newNamespace(namespaceName) + namespaceX := *newNamespace("namespace2") + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + gwPod := *newPod(namespaceX.Name, "gwPod", "node2", "10.0.0.1") + gwPod.Spec.HostNetwork = true + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, namespaceX, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + false, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceX.Name}}, + &metav1.LabelSelector{MatchLabels: map[string]string{"name": gwPod.Name}}, + true, + "", + ), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Pods(namespaceX.Name).Create(context.TODO(), &gwPod, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "10.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-2-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "10.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + BFD: &bfd1NamedUUID, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID", "static-route-2-UUID"}, + }, + } + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should disable bfd when removing the static hop from the namespace", func() { + app.Action = func(ctx *cli.Context) error { + namespaceT := *newNamespace(namespaceName) + + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + initNB := libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + BFD: &bfd1NamedUUID, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + }, + } + fakeOvn.startWithDBSetup( + initNB, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + ) + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData([]libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{}, + }, + })) + + p := newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + true, + nil, + nil, + false, + "") + _, err = fakeOvn.fakeClient.AdminPolicyRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Create(context.Background(), &p, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + tempNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.BFD{ + UUID: bfd1NamedUUID, + DstIP: "9.0.0.1", + LogicalPort: "rtoe-GR_node1", + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + BFD: &bfd1NamedUUID, + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + } + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(tempNB)) + + updatePolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + false, + nil, + nil, + false, + "", + fakeOvn.fakeClient.AdminPolicyRouteClient, + ) + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": namespaceName, + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + OutputPort: &logicalRouterPort, + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + }, + } + + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + ginkgo.Context("hybrid route policy operations in lgw mode", func() { + ginkgo.It("add hybrid route policy for pods", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + }, + }, + ) + + fakeOvn.RunAPBExternalPolicyController() + + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "2a7a61cb-fb13-4266-a3f0-9ac5c4471123 [u2596996164]", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"2a7a61cb-fb13-4266-a3f0-9ac5c4471123 [u2596996164]"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + err := fakeOvn.controller.addHybridRoutePolicyForPod(net.ParseIP("10.128.1.3"), "node1") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + // check if the address-set was created with the podIP + dbIDs := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + fakeOvn.asf.ExpectAddressSetWithIPs(dbIDs, []string{"10.128.1.3"}) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should reconcile a pod and create/delete the hybridRoutePolicy accordingly", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + + namespaceT := *newNamespace("namespace1") + namespaceT.Annotations = map[string]string{"k8s.ovn.org/routing-external-gws": "9.0.0.1"} + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + }, + }, + &adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList{ + Items: []adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + newPolicy("policy", + &metav1.LabelSelector{MatchLabels: map[string]string{"name": namespaceT.Name}}, + sets.NewString("9.0.0.1"), + true, + nil, + nil, + false, + "", + ), + }, + }, + ) + + t.populateLogicalSwitchCache(fakeOvn, getLogicalSwitchUUID(fakeOvn.controller.nbClient, "node1")) + + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + fakeOvn.RunAPBExternalPolicyController() + + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + nbWithLRP := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "lrp1", + Action: "reroute", + Match: "inport == \"rtos-node1\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + Nexthops: []string{"100.64.0.4"}, + Priority: ovntypes.HybridOverlayReroutePriority, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouterStaticRoute{ + UUID: "static-route-1-UUID", + IPPrefix: "10.128.1.3/32", + Nexthop: "9.0.0.1", + Options: map[string]string{ + "ecmp_symmetric_reply": "true", + }, + OutputPort: &logicalRouterPort, + Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP, + }, + &nbdb.LogicalSwitch{ + UUID: "493c61b4-2f97-446d-a1f0-1f713b510bbf", + Name: "node1", + Ports: []string{"lsp1"}, + }, + &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": "namespace1", + }, + Name: "namespace1_myPod", + Options: map[string]string{ + "requested-chassis": "node1", + "iface-id-ver": "myPod", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + }, + &nbdb.LogicalRouter{ + UUID: "e496b76e-18a1-461e-a919-6dcf0b3c35db", + Name: "ovn_cluster_router", + Policies: []string{"lrp1"}, + }, + &nbdb.LogicalRouter{ + UUID: "8945d2c1-bf8a-43ab-aa9f-6130eb525682", + Name: "GR_node1", + StaticRoutes: []string{"static-route-1-UUID"}, + }, + } + + gomega.Eventually(func() string { return getPodAnnotations(fakeOvn.fakeClient.KubeClient, t.namespace, t.podName) }, 2).Should(gomega.MatchJSON(`{"default": {"ip_addresses":["` + t.podIP + `/24"], "mac_address":"` + t.podMAC + `", "gateway_ips": ["` + t.nodeGWIP + `"], "ip_address":"` + t.podIP + `/24", "gateway_ip": "` + t.nodeGWIP + `"}}`)) + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(nbWithLRP)) + + deletePod(t.namespace, t.podName, fakeOvn.fakeClient.KubeClient) + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalSwitch{ + UUID: "493c61b4-2f97-446d-a1f0-1f713b510bbf", + Name: "node1", + }, + &nbdb.LogicalRouter{ + UUID: "e496b76e-18a1-461e-a919-6dcf0b3c35db", + Name: "ovn_cluster_router", + }, + &nbdb.LogicalRouter{ + UUID: "8945d2c1-bf8a-43ab-aa9f-6130eb525682", + Name: "GR_node1", + }, + } + gomega.Eventually(fakeOvn.nbClient, 5).Should(libovsdbtest.HaveData(finalNB)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("should create a single policy for concurrent addHybridRoutePolicy for the same node", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + }, + }, + }, + ) + fakeOvn.RunAPBExternalPolicyController() + + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "lrp1", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"lrp1"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + wg := &sync.WaitGroup{} + c := make(chan int) + for i := 1; i <= 5; i++ { + podIndex := i + wg.Add(1) + go func() { + defer wg.Done() + <-c + fakeOvn.controller.addHybridRoutePolicyForPod(net.ParseIP(fmt.Sprintf("10.128.1.%d", podIndex)), "node1") + }() + } + close(c) + wg.Wait() + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + + err := fakeOvn.controller.addHybridRoutePolicyForPod(net.ParseIP(fmt.Sprintf("10.128.1.%d", 6)), "node1") + // adding another pod after the initial burst should not trigger an error or change db + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("delete hybrid route policy for pods", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "2a7a61cb-fb13-4266-a3f0-9ac5c4471123 [u2596996164]", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"2a7a61cb-fb13-4266-a3f0-9ac5c4471123 [u2596996164]"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + }, + }, + ) + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + injectNode(fakeOvn) + fakeOvn.RunAPBExternalPolicyController() + err := fakeOvn.controller.delHybridRoutePolicyForPod(net.ParseIP("10.128.1.3"), "node1") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + dbIDs := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + fakeOvn.asf.EventuallyExpectNoAddressSet(dbIDs) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("delete hybrid route policy for pods with force", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeShared + asIndex1 := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + as1v4, _ := addressset.GetHashNamesForAS(asIndex1) + asIndex2 := getHybridRouteAddrSetDbIDs("node2", DefaultNetworkControllerName) + as2v4, _ := addressset.GetHashNamesForAS(asIndex2) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "501-1st-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == $" + as1v4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouterPolicy{ + UUID: "501-2nd-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.1.4"}, + Match: "inport == \"rtos-node2\" && ip4.src == $" + as2v4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"501-1st-UUID", "501-2nd-UUID"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + }, + }, + ) + + fakeOvn.RunAPBExternalPolicyController() + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + err := fakeOvn.controller.delAllHybridRoutePolicies() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + dbIDs := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + fakeOvn.asf.EventuallyExpectNoAddressSet(dbIDs) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + ginkgo.It("delete legacy hybrid route policies", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeLocal + asIndex := getHybridRouteAddrSetDbIDs("node1", DefaultNetworkControllerName) + asv4, _ := addressset.GetHashNamesForAS(asIndex) + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "501-1st-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.0.4"}, + Match: "inport == \"rtos-node1\" && ip4.src == 1.3.3.7 && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouterPolicy{ + UUID: "501-2nd-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.1.4"}, + Match: "inport == \"rtos-node2\" && ip4.src == 1.3.3.8 && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouterPolicy{ + UUID: "501-new-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.1.4"}, + Match: "inport == \"rtos-node2\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"501-1st-UUID", "501-2nd-UUID", "501-new-UUID"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + }, + }, + ) + + fakeOvn.RunAPBExternalPolicyController() + + finalNB := []libovsdbtest.TestData{ + &nbdb.LogicalRouterPolicy{ + UUID: "501-new-UUID", + Priority: ovntypes.HybridOverlayReroutePriority, + Action: nbdb.LogicalRouterPolicyActionReroute, + Nexthops: []string{"100.64.1.4"}, + Match: "inport == \"rtos-node2\" && ip4.src == $" + asv4 + " && ip4.dst != 10.128.0.0/14", + }, + &nbdb.LogicalRouter{ + Name: ovntypes.OVNClusterRouter, + UUID: ovntypes.OVNClusterRouter + "-UUID", + Policies: []string{"501-new-UUID"}, + }, + &nbdb.LogicalRouter{ + UUID: "GR_node1-UUID", + Name: "GR_node1", + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1" + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + "node1", + Networks: []string{"100.64.0.4/32"}, + }, + } + + err := fakeOvn.controller.delAllLegacyHybridRoutePolicies() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + ginkgo.Context("SNAT on gateway router operations", func() { + ginkgo.It("add/delete SNAT per pod on gateway router", func() { + app.Action = func(ctx *cli.Context) error { + config.Gateway.Mode = config.GatewayModeShared + config.Gateway.DisableSNATMultipleGWs = true + + nodeName := "node1" + namespaceT := *newNamespace(namespaceName) + t := newTPod( + "node1", + "10.128.1.0/24", + "10.128.1.2", + "10.128.1.1", + "myPod", + "10.128.1.3", + "0a:58:0a:80:01:03", + namespaceT.Name, + ) + + pod := []v1.Pod{ + *newPod(t.namespace, t.podName, t.nodeName, t.podIP), + } + + fakeOvn.startWithDBSetup( + libovsdbtest.TestSetup{ + NBData: []libovsdbtest.TestData{ + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName, + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + nodeName, + UUID: types.GWRouterPrefix + nodeName + "-UUID", + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &v1.PodList{ + Items: pod, + }, + ) + + finalNB := []libovsdbtest.TestData{ + &nbdb.NAT{ + UUID: "nat-UUID", + ExternalIP: "169.254.33.2", + LogicalIP: "10.128.1.3", + Options: map[string]string{"stateless": "false"}, + Type: nbdb.NATTypeSNAT, + }, + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + nodeName, + UUID: types.GWRouterPrefix + nodeName + "-UUID", + Nat: []string{"nat-UUID"}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName, + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + } + injectNode(fakeOvn) + err := fakeOvn.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOvn.controller.WatchPods() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + fakeOvn.RunAPBExternalPolicyController() + + extIPs, err := getExternalIPsGR(fakeOvn.controller.watchFactory, pod[0].Spec.NodeName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + _, fullMaskPodNet, _ := net.ParseCIDR("10.128.1.3/32") + gomega.Expect( + addOrUpdatePodSNAT(fakeOvn.controller.nbClient, pod[0].Spec.NodeName, extIPs, []*net.IPNet{fullMaskPodNet}), + ).To(gomega.Succeed()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + finalNB = []libovsdbtest.TestData{ + &nbdb.LogicalRouter{ + Name: types.GWRouterPrefix + nodeName, + UUID: types.GWRouterPrefix + nodeName + "-UUID", + Nat: []string{}, + }, + &nbdb.LogicalRouterPort{ + UUID: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName + "-UUID", + Name: ovntypes.GWRouterToJoinSwitchPrefix + ovntypes.GWRouterPrefix + nodeName, + Networks: []string{"100.64.0.4/32"}, + }, + &nbdb.LogicalSwitch{ + UUID: "node1", + Name: "node1", + }, + } + err = deletePodSNAT(fakeOvn.controller.nbClient, nodeName, extIPs, []*net.IPNet{fullMaskPodNet}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(finalNB)) + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) +}) + +func newPolicy(policyName string, fromNSSelector *metav1.LabelSelector, staticHopsGWIPs sets.String, bfdStatic bool, dynamicHopsNSSelector *metav1.LabelSelector, dynamicHopsPodSelector *metav1.LabelSelector, bfdDynamic bool, networkAttachementName string) adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute { + p := adminpolicybasedrouteapi.AdminPolicyBasedExternalRoute{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteSpec{ + From: adminpolicybasedrouteapi.ExternalNetworkSource{ + NamespaceSelector: *fromNSSelector, + }, + NextHops: adminpolicybasedrouteapi.ExternalNextHops{}, + }, + } + + if staticHopsGWIPs.Len() > 0 { + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{} + for ip := range staticHopsGWIPs { + p.Spec.NextHops.StaticHops = append(p.Spec.NextHops.StaticHops, &adminpolicybasedrouteapi.StaticHop{IP: ip, BFDEnabled: bfdStatic}) + } + } + if dynamicHopsNSSelector != nil && dynamicHopsPodSelector != nil { + p.Spec.NextHops.DynamicHops = []*adminpolicybasedrouteapi.DynamicHop{ + {NamespaceSelector: dynamicHopsNSSelector, + PodSelector: *dynamicHopsPodSelector, + NetworkAttachmentName: networkAttachementName, + BFDEnabled: bfdDynamic}, + } + } + return p +} + +func updatePolicy(policyName string, fromNSSelector *metav1.LabelSelector, staticHopsGWIPs sets.String, bfdStatic bool, dynamicHopsNSSelector *metav1.LabelSelector, dynamicHopsPodSelector *metav1.LabelSelector, bfdDynamic bool, networkAttachementName string, fakeRouteClient adminpolicybasedrouteclientset.Interface) { + + p, err := fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Get(context.TODO(), policyName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + p.Generation++ + p.Spec.From.NamespaceSelector = *fromNSSelector + + p.Spec.NextHops.StaticHops = []*adminpolicybasedrouteapi.StaticHop{} + if staticHopsGWIPs.Len() > 0 { + for ip := range staticHopsGWIPs { + p.Spec.NextHops.StaticHops = append(p.Spec.NextHops.StaticHops, &adminpolicybasedrouteapi.StaticHop{IP: ip, BFDEnabled: bfdStatic}) + } + } + p.Spec.NextHops.DynamicHops = []*adminpolicybasedrouteapi.DynamicHop{} + if dynamicHopsNSSelector != nil && dynamicHopsPodSelector != nil { + p.Spec.NextHops.DynamicHops = append(p.Spec.NextHops.DynamicHops, + &adminpolicybasedrouteapi.DynamicHop{ + NamespaceSelector: dynamicHopsNSSelector, + PodSelector: *dynamicHopsPodSelector, + NetworkAttachmentName: networkAttachementName, + BFDEnabled: bfdDynamic}, + ) + } + _, err = fakeRouteClient.K8sV1().AdminPolicyBasedExternalRoutes().Update(context.Background(), p, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} + +func deletePod(namespace, name string, fakeClient kubernetes.Interface) { + + p, err := fakeClient.CoreV1().Pods(namespace).Get(context.TODO(), name, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + p.Generation++ + p.DeletionTimestamp = &metav1.Time{Time: time.Now()} + _, err = fakeClient.CoreV1().Pods(namespace).Update(context.Background(), p, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeClient.CoreV1().Pods(namespace).Delete(context.Background(), p.Name, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} + +func deleteNamespace(namespaceName string, fakeClient kubernetes.Interface) { + + ns, err := fakeClient.CoreV1().Namespaces().Get(context.TODO(), namespaceName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ns.Generation++ + ns.DeletionTimestamp = &metav1.Time{Time: time.Now()} + _, err = fakeClient.CoreV1().Namespaces().Update(context.Background(), ns, metav1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeClient.CoreV1().Namespaces().Delete(context.Background(), namespaceName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) +} + +func (o *FakeOVN) RunAPBExternalPolicyController() { + klog.Warningf("#### [%p] INIT Admin Policy Based External Controller", o) + o.controller.wg.Add(1) + go func() { + defer o.controller.wg.Done() + o.controller.apbExternalRouteController.Run(5) + }() +} diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index d6089023a3..3fbcd470e2 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -15,6 +15,7 @@ import ( libovsdbclient "github.com/ovn-org/libovsdb/client" ovncnitypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedroutefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned/fake" egressfirewall "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1" egressfirewallfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned/fake" egressip "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" @@ -24,6 +25,8 @@ import ( egressqosfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned/fake" egressservice "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1" egressservicefake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressservice/v1/apis/clientset/versioned/fake" + + adminpolicybasedrouteapi "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" @@ -101,24 +104,26 @@ func (o *FakeOVN) start(objects ...runtime.Object) { egressQoSObjects := []runtime.Object{} multiNetworkPolicyObjects := []runtime.Object{} egressServiceObjects := []runtime.Object{} + apbExternalRouteObjects := []runtime.Object{} v1Objects := []runtime.Object{} - nads := []*nettypes.NetworkAttachmentDefinition{} + nads := []nettypes.NetworkAttachmentDefinition{} for _, object := range objects { - if _, isEgressIPObject := object.(*egressip.EgressIPList); isEgressIPObject { + switch o := object.(type) { + case *egressip.EgressIPList: egressIPObjects = append(egressIPObjects, object) - } else if _, isEgressFirewallObject := object.(*egressfirewall.EgressFirewallList); isEgressFirewallObject { + case *egressfirewall.EgressFirewallList: egressFirewallObjects = append(egressFirewallObjects, object) - } else if _, isEgressQoSObject := object.(*egressqos.EgressQoSList); isEgressQoSObject { + case *egressqos.EgressQoSList: egressQoSObjects = append(egressQoSObjects, object) - } else if _, isMultiNetworkPolicyObject := object.(*mnpapi.MultiNetworkPolicyList); isMultiNetworkPolicyObject { + case *mnpapi.MultiNetworkPolicyList: multiNetworkPolicyObjects = append(multiNetworkPolicyObjects, object) - } else if nadList, isNADObject := object.(*nettypes.NetworkAttachmentDefinitionList); isNADObject { - for i := range nadList.Items { - nads = append(nads, &nadList.Items[i]) - } - } else if _, isEgressServiceObject := object.(*egressservice.EgressServiceList); isEgressServiceObject { + case *egressservice.EgressServiceList: egressServiceObjects = append(egressServiceObjects, object) - } else { + case *nettypes.NetworkAttachmentDefinitionList: + nads = append(nads, o.Items...) + case *adminpolicybasedrouteapi.AdminPolicyBasedExternalRouteList: + apbExternalRouteObjects = append(apbExternalRouteObjects, object) + default: v1Objects = append(v1Objects, object) } } @@ -129,6 +134,7 @@ func (o *FakeOVN) start(objects ...runtime.Object) { EgressQoSClient: egressqosfake.NewSimpleClientset(egressQoSObjects...), MultiNetworkPolicyClient: mnpfake.NewSimpleClientset(multiNetworkPolicyObjects...), EgressServiceClient: egressservicefake.NewSimpleClientset(egressServiceObjects...), + AdminPolicyRouteClient: adminpolicybasedroutefake.NewSimpleClientset(apbExternalRouteObjects...), } o.init(nads) } @@ -147,7 +153,7 @@ func (o *FakeOVN) shutdown() { o.nbsbCleanup.Cleanup() } -func (o *FakeOVN) init(nadList []*nettypes.NetworkAttachmentDefinition) { +func (o *FakeOVN) init(nadList []nettypes.NetworkAttachmentDefinition) { var err error o.watcher, err = factory.NewMasterWatchFactory(o.fakeClient) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -169,7 +175,7 @@ func (o *FakeOVN) init(nadList []*nettypes.NetworkAttachmentDefinition) { o.controller.routerLoadBalancerGroupUUID = types.ClusterRouterLBGroupName + "-UUID" for _, nad := range nadList { - err := o.NewSecondaryNetworkController(nad) + err := o.NewSecondaryNetworkController(&nad) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } @@ -227,6 +233,7 @@ func NewOvnController(ovnClient *util.OVNMasterClientset, wf *factory.WatchFacto EIPClient: ovnClient.EgressIPClient, EgressFirewallClient: ovnClient.EgressFirewallClient, EgressServiceClient: ovnClient.EgressServiceClient, + APBRouteClient: ovnClient.AdminPolicyRouteClient, }, wf, recorder, diff --git a/go-controller/pkg/util/kube.go b/go-controller/pkg/util/kube.go index 0aead0e5dc..753617db3f 100644 --- a/go-controller/pkg/util/kube.go +++ b/go-controller/pkg/util/kube.go @@ -31,6 +31,7 @@ import ( networkattchmentdefclientset "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned" ocpcloudnetworkclientset "github.com/openshift/client-go/cloudnetwork/clientset/versioned" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + adminpolicybasedrouteclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/adminpolicybasedroute/v1/apis/clientset/versioned" egressfirewallclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressfirewall/v1/apis/clientset/versioned" egressipclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned" egressqosclientset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressqos/v1/apis/clientset/versioned" @@ -48,6 +49,7 @@ type OVNClientset struct { NetworkAttchDefClient networkattchmentdefclientset.Interface MultiNetworkPolicyClient multinetworkpolicyclientset.Interface EgressServiceClient egressserviceclientset.Interface + AdminPolicyRouteClient adminpolicybasedrouteclientset.Interface } // OVNMasterClientset @@ -58,11 +60,13 @@ type OVNMasterClientset struct { EgressQoSClient egressqosclientset.Interface MultiNetworkPolicyClient multinetworkpolicyclientset.Interface EgressServiceClient egressserviceclientset.Interface + AdminPolicyRouteClient adminpolicybasedrouteclientset.Interface } type OVNNodeClientset struct { - KubeClient kubernetes.Interface - EgressServiceClient egressserviceclientset.Interface + KubeClient kubernetes.Interface + EgressServiceClient egressserviceclientset.Interface + AdminPolicyRouteClient adminpolicybasedrouteclientset.Interface } type OVNClusterManagerClientset struct { @@ -80,6 +84,7 @@ func (cs *OVNClientset) GetMasterClientset() *OVNMasterClientset { EgressQoSClient: cs.EgressQoSClient, MultiNetworkPolicyClient: cs.MultiNetworkPolicyClient, EgressServiceClient: cs.EgressServiceClient, + AdminPolicyRouteClient: cs.AdminPolicyRouteClient, } } @@ -94,8 +99,9 @@ func (cs *OVNClientset) GetClusterManagerClientset() *OVNClusterManagerClientset func (cs *OVNClientset) GetNodeClientset() *OVNNodeClientset { return &OVNNodeClientset{ - KubeClient: cs.KubeClient, - EgressServiceClient: cs.EgressServiceClient, + KubeClient: cs.KubeClient, + EgressServiceClient: cs.EgressServiceClient, + AdminPolicyRouteClient: cs.AdminPolicyRouteClient, } } @@ -220,6 +226,11 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { return nil, err } + adminPolicyBasedRouteClientset, err := adminpolicybasedrouteclientset.NewForConfig(kconfig) + if err != nil { + return nil, err + } + return &OVNClientset{ KubeClient: kclientset, EgressIPClient: egressIPClientset, @@ -229,6 +240,7 @@ func NewOVNClientset(conf *config.KubernetesConfig) (*OVNClientset, error) { NetworkAttchDefClient: networkAttchmntDefClientset, MultiNetworkPolicyClient: multiNetworkPolicyClientset, EgressServiceClient: egressserviceClientset, + AdminPolicyRouteClient: adminPolicyBasedRouteClientset, }, nil } diff --git a/go-controller/pkg/util/net_linux.go b/go-controller/pkg/util/net_linux.go index ddcda76d4b..c0f9648982 100644 --- a/go-controller/pkg/util/net_linux.go +++ b/go-controller/pkg/util/net_linux.go @@ -428,7 +428,6 @@ func DeleteConntrack(ip string, port int32, protocol kapi.Protocol, ipFilterType return fmt.Errorf("could not add label %s to conntrack filter: %v", labels, err) } } - if ipAddress.To4() != nil { if _, err := netLinkOps.ConntrackDeleteFilter(netlink.ConntrackTable, netlink.FAMILY_V4, filter); err != nil { return err diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 3d0ac0e39b..979db0f941 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -266,8 +266,8 @@ func createGenericPod(f *framework.Framework, podName, nodeSelector, namespace s } // Create a pod on the specified node using the agnostic host image -func createGenericPodWithLabel(f *framework.Framework, podName, nodeSelector, namespace string, command []string, labels map[string]string) (*v1.Pod, error) { - return createPod(f, podName, nodeSelector, namespace, command, labels) +func createGenericPodWithLabel(f *framework.Framework, podName, nodeSelector, namespace string, command []string, labels map[string]string, options ...func(*v1.Pod)) (*v1.Pod, error) { + return createPod(f, podName, nodeSelector, namespace, command, labels, options...) } func createServiceForPodsWithLabel(f *framework.Framework, namespace string, servicePort int32, targetPort string, serviceType string, labels map[string]string) (string, error) { @@ -328,6 +328,11 @@ func deleteClusterExternalContainer(containerName string) { if err != nil { framework.Failf("failed to delete external test container, err: %v", err) } + gomega.Eventually(func() string { + output, err := runCommand(containerRuntime, "ps", "-f", fmt.Sprintf("name=%s", containerName), "-q") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return output + }, 5).Should(gomega.HaveLen(0)) } func updateNamespace(f *framework.Framework, namespace *v1.Namespace) { @@ -341,7 +346,7 @@ func getNamespace(f *framework.Framework, name string) *v1.Namespace { } func updatePod(f *framework.Framework, pod *v1.Pod) { - _, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Update(context.Background(), pod, metav1.UpdateOptions{}) + _, err := f.ClientSet.CoreV1().Pods(pod.Namespace).Update(context.Background(), pod, metav1.UpdateOptions{}) framework.ExpectNoError(err, fmt.Sprintf("unable to update pod: %s, err: %v", pod.Name, err)) } func getPod(f *framework.Framework, podName string) *v1.Pod { diff --git a/test/e2e/external_gateways.go b/test/e2e/external_gateways.go index 82d46504a9..d75b7bc645 100644 --- a/test/e2e/external_gateways.go +++ b/test/e2e/external_gateways.go @@ -1,6 +1,8 @@ package e2e import ( + "context" + "encoding/json" "fmt" "net" "os" @@ -10,12 +12,16 @@ import ( "time" "github.com/google/go-cmp/cmp" + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" "github.com/onsi/ginkgo" ginkgotable "github.com/onsi/ginkgo/extensions/table" "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" + "k8s.io/klog" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" "k8s.io/kubernetes/test/e2e/framework/skipper" @@ -28,6 +34,7 @@ const ( srcHTTPPort = 80 srcUDPPort = 90 externalGatewayPodIPsAnnotation = "k8s.ovn.org/external-gw-pod-ips" + defaultPolicyName = "default-route-policy" ) var externalContainerNetwork = "kind" @@ -63,713 +70,1850 @@ type gatewayTestIPs struct { targetIPs []string } -// Validate pods can reach a network running in a container's looback address via -// an external gateway running on eth0 of the container without any tunnel encap. -// The traffic will get proxied through an annotated pod in the serving namespace. -var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { - const ( - svcname string = "externalgw-pod-novxlan" - gwContainer1 string = "ex-gw-container1" - gwContainer2 string = "ex-gw-container2" - srcPingPodName string = "e2e-exgw-src-ping-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - externalTCPPort = 91 - externalUDPPort = 90 - ecmpRetry int = 20 - testTimeout string = "20" - ) +var _ = ginkgo.Describe("External Gateway test suite", func() { + + var _ = ginkgo.Context("With annotations", func() { + + // Validate pods can reach a network running in a container's looback address via + // an external gateway running on eth0 of the container without any tunnel encap. + // The traffic will get proxied through an annotated pod in the serving namespace. + var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - var ( - sleepCommand = []string{"bash", "-c", "sleep 20000"} - addressesv4, addressesv6 gatewayTestIPs - clientSet kubernetes.Interface - servingNamespace string - ) + ns, err := f.CreateNamespace("exgw-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name - var ( - gwContainers []string - ) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) + setupAnnotatedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) + }) - f := wrappedTestFramework(svcname) + ginkgo.AfterEach(func() { + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + resetGatewayAnnotations(f) + }) - ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach - // retrieve worker node names - nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway CR", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } - ns, err := f.CreateNamespace("exgw-serving", nil) - framework.ExpectNoError(err) - servingNamespace = ns.Name + ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) - setupGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) - }) + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) - ginkgo.AfterEach(func() { - cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) - }) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } - ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(addresses *gatewayTestIPs, icmpCommand string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + pingSync := sync.WaitGroup{} + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) + } + pingSync.Wait() + tcpDumpSync.Wait() + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp"), + ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) + + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", + func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } - ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + } - tcpDumpSync := sync.WaitGroup{} - tcpDumpSync.Add(len(gwContainers)) + expectedHostNames := make(map[string]struct{}) + for _, c := range gwContainers { + res, err := runCommand(containerRuntime, "exec", c, "hostname") + framework.ExpectNoError(err, "failed to run hostname in %s", c) + hostname := strings.TrimSuffix(res, "\n") + framework.Logf("Hostname for %s is %s", c, hostname) + expectedHostNames[hostname] = struct{}{} + } + framework.Logf("Expected hostnames are %v", expectedHostNames) - for _, gwContainer := range gwContainers { - go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) - } + ginkgo.By("Checking that external ips are reachable with both gateways") + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + args := []string{"exec", srcPingPodName, "--"} + if protocol == "tcp" { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + } else { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + } + res, err := framework.RunKubectl(f.Namespace.Name, args...) + framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + hostname := strings.TrimSuffix(res, "\n") + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } - pingSync := sync.WaitGroup{} - // Verify the external gateway loopback address running on the external container is reachable and - // that traffic from the source ping pod is proxied through the pod in the serving namespace - ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") - for _, t := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) - }(t) - } - pingSync.Wait() - tcpDumpSync.Wait() - }, - ginkgotable.Entry("ipv4", &addressesv4, "icmp"), - ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) - - ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - for _, container := range gwContainers { - reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) - } + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - expectedHostNames := make(map[string]struct{}) - for _, c := range gwContainers { - res, err := runCommand(containerRuntime, "exec", c, "hostname") - framework.ExpectNoError(err, "failed to run hostname in %s", c) - hostname := strings.TrimSuffix(res, "\n") - framework.Logf("Hostname for %s is %s", c, hostname) - expectedHostNames[hostname] = struct{}{} - } - framework.Logf("Expected hostnames are %v", expectedHostNames) - - ginkgo.By("Checking that external ips are reachable with both gateways") - returnedHostNames := make(map[string]struct{}) - target := addresses.targetIPs[0] - success := false - for i := 0; i < 20; i++ { - args := []string{"exec", srcPingPodName, "--"} - if protocol == "tcp" { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + }) + + // Validate pods can reach a network running in multiple container's loopback + // addresses via two external gateways running on eth0 of the container without + // any tunnel encap. This test defines two external gateways and validates ECMP + // functionality to the container loopbacks. To verify traffic reaches the + // gateways, tcpdump is running on the external gateways and will exit successfully + // once an ICMP packet is received from the annotated pod in the k8s cluster. + // Two additional gateways are added to verify the tcp / udp protocols. + // They run the netexec command, and the pod asks to return their hostname. + // The test checks that both hostnames are collected at least once. + var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + testTimeout string = "30" + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" + externalTCPPort = 80 + externalUDPPort = 90 + ) + + f := wrappedTestFramework(svcname) + + var gwContainers []string + var addressesv4, addressesv6 gatewayTestIPs + + ginkgo.BeforeEach(func() { + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } + + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry) + + }) + + ginkgo.AfterEach(func() { + // tear down the containers simulating the gateways + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + resetGatewayAnnotations(f) + }) + + ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[:]...) + + ginkgo.By("Verifying connectivity to the pod from external gateways") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + ginkgo.By("Verifying connectivity to the pod from external gateways with large packets > pod MTU") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-s", "1420", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // Verify the gateways and remote loopback addresses are reachable from the pod. + // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see + // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 + ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") + + // Check for egress traffic to both gateway loopback addresses using tcpdump, since + // /proc/net/dev counters only record the ingress interface traffic is received on. + // The test will waits until an ICMP packet is matched on the gateways or fail the + // test if a packet to the loopback is not received within the timer interval. + // If an ICMP packet is never detected, return the error via the specified chanel. + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) + } + + pingSync := sync.WaitGroup{} + + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } + pingSync.Wait() + tcpDumpSync.Wait() + + }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), + ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) + + // This test runs a listener on the external container, returning the host name both on tcp and udp. + // The src pod tries to hit the remote address until both the containers are hit. + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[:]...) + + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPodName, container, protocol) + } + + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + success := false + + // Picking only the first address, the one the udp listener is set for + target := addresses.targetIPs[0] + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } + + }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort, srcUDPPort), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort, srcHTTPPort), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort, srcUDPPort), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort, srcHTTPPort)) + }) + + var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + srcPodName string = "e2e-exgw-src-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ) + + var ( + servingNamespace string + ) + + f := wrappedTestFramework(svcname) + + var ( + addressesv4, addressesv6 gatewayTestIPs + sleepCommand []string + nodes *v1.NodeList + err error + clientSet kubernetes.Interface + ) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err = e2enode.GetBoundedReadySchedulableNodes(clientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } + + ns, err := f.CreateNamespace("exgw-conntrack-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name + + addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) + sleepCommand = []string{"bash", "-c", "sleep 20000"} + _, err = createGenericPod(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand) + framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + _, err = createGenericPod(f, gatewayPodName2, nodes.Items[1].Name, servingNamespace, sleepCommand) + framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + }) + + ginkgo.AfterEach(func() { + // tear down the containers and pods simulating the gateways + ginkgo.By("Deleting the gateway containers") + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + resetGatewayAnnotations(f) + }) + + ginkgotable.DescribeTable("Namespace annotation: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + ginkgo.By("Annotate the app namespace to get managed by external gateways") + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs...) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + } + + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol + + ginkgo.By("Remove second external gateway IP from the app namespace annotation") + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[0]) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + if protocol == "udp" { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(1)) // we still have the conntrack entry for the remaining gateway + gomega.Expect(totalPodConnEntries).To(gomega.Equal(5)) // 6-1 } else { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) + } + + ginkgo.By("Remove first external gateway IP from the app namespace annotation") + annotateNamespaceForGateway(f.Namespace.Name, false, "") + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + if protocol == "udp" { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(0)) // we don't have any remaining gateways left + gomega.Expect(totalPodConnEntries).To(gomega.Equal(4)) // 6-2 + } else { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) + } + + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + + ginkgotable.DescribeTable("ExternalGWPod annotation: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } - res, err := framework.RunKubectl(f.Namespace.Name, args...) - framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) - hostname := strings.TrimSuffix(res, "\n") - if hostname != "" { - returnedHostNames[hostname] = struct{}{} + ginkgo.By("Annotate the external gw pods to manage the src app pod namespace") + for i, gwPod := range []string{gatewayPodName1, gatewayPodName2} { + networkIPs := fmt.Sprintf("\"%s\"", addresses.gatewayIPs[i]) + if addresses.srcPodIP != "" && addresses.nodeIP != "" { + networkIPs = fmt.Sprintf("\"%s\", \"%s\"", addresses.gatewayIPs[i], addresses.gatewayIPs[i]) + } + annotatePodForGateway(gwPod, servingNamespace, f.Namespace.Name, networkIPs, false) } - if cmp.Equal(returnedHostNames, expectedHostNames) { - success = true - break + // ensure the conntrack deletion tracker annotation is updated + ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") + err := wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + ns := getNamespace(f, f.Namespace.Name) + return (ns.Annotations[externalGatewayPodIPsAnnotation] == fmt.Sprintf("%s,%s", addresses.gatewayIPs[0], addresses.gatewayIPs[1])), nil + }) + framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") } - } - framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - if !success { - framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) - } + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol + + ginkgo.By("Remove second external gateway pod's routing-namespace annotation") + annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) + + // ensure the conntrack deletion tracker annotation is updated + ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + ns := getNamespace(f, f.Namespace.Name) + return (ns.Annotations[externalGatewayPodIPsAnnotation] == addresses.gatewayIPs[0]), nil + }) + framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + if protocol == "udp" { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(1)) // we still have the conntrack entry for the remaining gateway + gomega.Expect(totalPodConnEntries).To(gomega.Equal(5)) // 6-1 + } else { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) + } - }, - ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), - ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), - ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), - ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) -}) + ginkgo.By("Remove first external gateway pod's routing-namespace annotation") + annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) + + // ensure the conntrack deletion tracker annotation is updated + ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + ns := getNamespace(f, f.Namespace.Name) + return (ns.Annotations[externalGatewayPodIPsAnnotation] == ""), nil + }) + framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + if protocol == "udp" { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(0)) // we don't have any remaining gateways left + gomega.Expect(totalPodConnEntries).To(gomega.Equal(4)) // 6-2 + } else { + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) + } -// Validate pods can reach a network running in multiple container's loopback -// addresses via two external gateways running on eth0 of the container without -// any tunnel encap. This test defines two external gateways and validates ECMP -// functionality to the container loopbacks. To verify traffic reaches the -// gateways, tcpdump is running on the external gateways and will exit successfully -// once an ICMP packet is received from the annotated pod in the k8s cluster. -// Two additional gateways are added to verify the tcp / udp protocols. -// They run the netexec command, and the pod asks to return their hostname. -// The test checks that both hostnames are collected at least once. -var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { - const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - testTimeout string = "30" - ecmpRetry int = 20 - srcPodName = "e2e-exgw-src-pod" - externalTCPPort = 80 - externalUDPPort = 90 - ) + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + }) - f := wrappedTestFramework(svcname) + // BFD Tests are dual of external gateway. The only difference is that they enable BFD on ovn and + // on the external containers, and after doing one round veryfing that the traffic reaches both containers, + // they delete one and verify that the traffic is always reaching the only alive container. + var _ = ginkgo.Context("BFD", func() { + var _ = ginkgo.Describe("e2e non-vxlan external gateway through an annotated gateway pod", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + defaultPolicyName = "default-route-policy" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - var gwContainers []string - var addressesv4, addressesv6 gatewayTestIPs + ns, err := f.CreateNamespace("exgw-bfd-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name - ginkgo.BeforeEach(func() { - // retrieve worker node names - nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } + setupBFD := setupBFDOnContainer(nodes.Items) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) + setupAnnotatedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, true) + }) - if externalContainerNetwork == "host" { - skipper.Skipf("Skipping as host network doesn't support multiple external gateways") - } + ginkgo.AfterEach(func() { + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + resetGatewayAnnotations(f) + }) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry) + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } - // remove the routing external annotation - annotateArgs := []string{ - "annotate", - "namespace", - f.Namespace.Name, - "k8s.ovn.org/routing-external-gws-", - } - ginkgo.By("Resetting the gw annotation") - framework.RunKubectlOrDie(f.Namespace.Name, annotateArgs...) - }) + ginkgo.By("Verifying connectivity to the pod from external gateways") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // This is needed for bfd to sync up + time.Sleep(3 * time.Second) + + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") + } + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } + + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + + pingSync := sync.WaitGroup{} + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPingPodName, err) + } + }(address) + } + + pingSync.Wait() + tcpDumpSync.Wait() + + if len(gwContainers) > 1 { + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + time.Sleep(3 * time.Second) // bfd timeout + + tcpDumpSync = sync.WaitGroup{} + tcpDumpSync.Add(1) + go checkPingOnContainer(gwContainers[0], srcPingPodName, icmpCommand, &tcpDumpSync) + + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + pingSync = sync.WaitGroup{} + + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) + } + pingSync.Wait() + tcpDumpSync.Wait() + } + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp"), + ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) + + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", + func(protocol string, addresses *gatewayTestIPs, destPort int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") + } + + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } + + if len(gwContainers) > 1 { + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + ginkgo.By("Waiting for BFD to sync") + time.Sleep(3 * time.Second) // bfd timeout + + // ECMP should direct all the traffic to the only container + expectedHostName := hostNameForContainer(gwContainers[0]) + + ginkgo.By("Checking hostname multiple times") + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") + } + } + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort)) + }) + + // Validate pods can reach a network running in multiple container's loopback + // addresses via two external gateways running on eth0 of the container without + // any tunnel encap. This test defines two external gateways and validates ECMP + // functionality to the container loopbacks. To verify traffic reaches the + // gateways, tcpdump is running on the external gateways and will exit successfully + // once an ICMP packet is received from the annotated pod in the k8s cluster. + // Two additional gateways are added to verify the tcp / udp protocols. + // They run the netexec command, and the pod asks to return their hostname. + // The test checks that both hostnames are collected at least once. + var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + testTimeout string = "30" + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" + externalTCPPort = 80 + externalUDPPort = 90 + ) + + var ( + gwContainers []string + ) + + testContainer := fmt.Sprintf("%s-container", srcPodName) + testContainerFlag := fmt.Sprintf("--container=%s", testContainer) + + f := wrappedTestFramework(svcname) + + var addressesv4, addressesv6 gatewayTestIPs + + ginkgo.BeforeEach(func() { + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } + + setupBFD := setupBFDOnContainer(nodes.Items) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) + + }) + + ginkgo.AfterEach(func() { + // tear down the containers simulating the gateways + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + resetGatewayAnnotations(f) + }) + + ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // This is needed for bfd to sync up + time.Sleep(3 * time.Second) + + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") + } + + // Verify the gateways and remote loopback addresses are reachable from the pod. + // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see + // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 + ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") + + // Check for egress traffic to both gateway loopback addresses using tcpdump, since + // /proc/net/dev counters only record the ingress interface traffic is received on. + // The test will waits until an ICMP packet is matched on the gateways or fail the + // test if a packet to the loopback is not received within the timer interval. + // If an ICMP packet is never detected, return the error via the specified chanel. + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) + } + + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + + pingSync := sync.WaitGroup{} + + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } + + pingSync.Wait() + tcpDumpSync.Wait() + + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + time.Sleep(3 * time.Second) // bfd timeout + + pingSync = sync.WaitGroup{} + tcpDumpSync = sync.WaitGroup{} + + tcpDumpSync.Add(1) + go checkPingOnContainer(gwContainers[0], srcPodName, icmpToDump, &tcpDumpSync) + + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } + + pingSync.Wait() + tcpDumpSync.Wait() + + }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), + ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) + + // This test runs a listener on the external container, returning the host name both on tcp and udp. + // The src pod tries to hit the remote address until both the containers are hit. + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) + + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // This is needed for bfd to sync up + time.Sleep(3 * time.Second) + + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") + } + + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + success := false + + // Picking only the first address, the one the udp listener is set for + target := addresses.targetIPs[0] + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } + + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + ginkgo.By("Waiting for BFD to sync") + time.Sleep(3 * time.Second) // bfd timeout + + // ECMP should direct all the traffic to the only container + expectedHostName := hostNameForContainer(gwContainers[0]) + + ginkgo.By("Checking hostname multiple times") + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") + } + }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort)) + }) + }) - ginkgo.AfterEach(func() { - // tear down the containers simulating the gateways - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) }) - ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + var _ = ginkgo.Context("With Admin Policy Based External Route CRs", func() { + + // Validate pods can reach a network running in a container's looback address via + // an external gateway running on eth0 of the container without any tunnel encap. + // The traffic will get proxied through an annotated pod in the serving namespace. + var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[:]...) + ns, err := f.CreateNamespace("exgw-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name - ginkgo.By("Verifying connectivity to the pod from external gateways") - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) + setupPolicyBasedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6) + }) - ginkgo.By("Verifying connectivity to the pod from external gateways with large packets > pod MTU") - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-s", "1420", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + ginkgo.AfterEach(func() { + deleteAPBExternalRouteCR(defaultPolicyName) + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + }) - // Verify the gateways and remote loopback addresses are reachable from the pod. - // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see - // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 - ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a gateway pod", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) - // Check for egress traffic to both gateway loopback addresses using tcpdump, since - // /proc/net/dev counters only record the ingress interface traffic is received on. - // The test will waits until an ICMP packet is matched on the gateways or fail the - // test if a packet to the loopback is not received within the timer interval. - // If an ICMP packet is never detected, return the error via the specified chanel. + ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - tcpDumpSync := sync.WaitGroup{} - tcpDumpSync.Add(len(gwContainers)) - for _, gwContainer := range gwContainers { - go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) - } + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } - pingSync := sync.WaitGroup{} - - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. - for _, address := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + pingSync := sync.WaitGroup{} + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) + } + pingSync.Wait() + tcpDumpSync.Wait() + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp"), + ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) + + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a gateway pod", + func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + } + + expectedHostNames := make(map[string]struct{}) + for _, c := range gwContainers { + res, err := runCommand(containerRuntime, "exec", c, "hostname") + framework.ExpectNoError(err, "failed to run hostname in %s", c) + hostname := strings.TrimSuffix(res, "\n") + framework.Logf("Hostname for %s is %s", c, hostname) + expectedHostNames[hostname] = struct{}{} + } + framework.Logf("Expected hostnames are %v", expectedHostNames) + + ginkgo.By("Checking that external ips are reachable with both gateways") + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + args := []string{"exec", srcPingPodName, "--"} + if protocol == "tcp" { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + } else { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + } + res, err := framework.RunKubectl(f.Namespace.Name, args...) + framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + hostname := strings.TrimSuffix(res, "\n") + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } + + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + }) + + // Validate pods can reach a network running in multiple container's loopback + // addresses via two external gateways running on eth0 of the container without + // any tunnel encap. This test defines two external gateways and validates ECMP + // functionality to the container loopbacks. To verify traffic reaches the + // gateways, tcpdump is running on the external gateways and will exit successfully + // once an ICMP packet is received from the annotated pod in the k8s cluster. + // Two additional gateways are added to verify the tcp / udp protocols. + // They run the netexec command, and the pod asks to return their hostname. + // The test checks that both hostnames are collected at least once. + var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + testTimeout string = "30" + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" + externalTCPPort = 80 + externalUDPPort = 90 + ) + + f := wrappedTestFramework(svcname) + + var gwContainers []string + var addressesv4, addressesv6 gatewayTestIPs + + ginkgo.BeforeEach(func() { + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) } - }(address) - } - pingSync.Wait() - tcpDumpSync.Wait() - }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), - ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } + // ensure there are no namespaces with the gateway annotation + gomega.Eventually(func() int { + nsList, err := f.ClientSet.CoreV1().Namespaces().List(context.Background(), metav1.ListOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + count := 0 + for _, ns := range nsList.Items { + _, f1 := ns.Annotations["k8s.ovn.org/routing-external-gws"] + _, f2 := ns.Annotations["k8s.ovn.org/external-gw-pod-ips"] + if f1 || f2 { + count++ + } + } + return count + }, 20, 1).Should(gomega.Equal(0)) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry) + }) + + ginkgo.AfterEach(func() { + // tear down the containers simulating the gateways + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + deleteAPBExternalRouteCR(defaultPolicyName) + }) + + ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) - // This test runs a listener on the external container, returning the host name both on tcp and udp. - // The src pod tries to hit the remote address until both the containers are hit. - ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort, destPortOnPod int) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + ginkgo.By("Verifying connectivity to the pod from external gateways") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[:]...) + ginkgo.By("Verifying connectivity to the pod from external gateways with large packets > pod MTU") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-s", "1420", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - for _, container := range gwContainers { - reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPodName, container, protocol) - } + // Verify the gateways and remote loopback addresses are reachable from the pod. + // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see + // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 + ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") - expectedHostNames := hostNamesForContainers(gwContainers) - framework.Logf("Expected hostnames are %v", expectedHostNames) + // Check for egress traffic to both gateway loopback addresses using tcpdump, since + // /proc/net/dev counters only record the ingress interface traffic is received on. + // The test will waits until an ICMP packet is matched on the gateways or fail the + // test if a packet to the loopback is not received within the timer interval. + // If an ICMP packet is never detected, return the error via the specified chanel. - returnedHostNames := make(map[string]struct{}) - success := false + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) + } - // Picking only the first address, the one the udp listener is set for - target := addresses.targetIPs[0] - for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) - if hostname != "" { - returnedHostNames[hostname] = struct{}{} - } - if cmp.Equal(returnedHostNames, expectedHostNames) { - success = true - break - } - } + pingSync := sync.WaitGroup{} - framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } + pingSync.Wait() + tcpDumpSync.Wait() - if !success { - framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) - } + }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), + ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) - }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort, srcUDPPort), - ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort, srcHTTPPort), - ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort, srcUDPPort), - ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort, srcHTTPPort)) -}) + // This test runs a listener on the external container, returning the host name both on tcp and udp. + // The src pod tries to hit the remote address until both the containers are hit. + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) -var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { - const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - srcPodName string = "e2e-exgw-src-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - ) + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPodName, container, protocol) + } - var ( - servingNamespace string - ) + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) - f := wrappedTestFramework(svcname) + returnedHostNames := make(map[string]struct{}) + success := false - var ( - addressesv4, addressesv6 gatewayTestIPs - sleepCommand []string - nodes *v1.NodeList - err error - clientSet kubernetes.Interface - ) + // Picking only the first address, the one the udp listener is set for + target := addresses.targetIPs[0] + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } - ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach - // retrieve worker node names - nodes, err = e2enode.GetBoundedReadySchedulableNodes(clientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - if externalContainerNetwork == "host" { - skipper.Skipf("Skipping as host network doesn't support multiple external gateways") - } + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - ns, err := f.CreateNamespace("exgw-conntrack-serving", nil) - framework.ExpectNoError(err) - servingNamespace = ns.Name + }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort, srcUDPPort), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort, srcHTTPPort), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort, srcUDPPort), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort, srcHTTPPort)) + }) - addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) - sleepCommand = []string{"bash", "-c", "sleep 20000"} - _, err = createGenericPod(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand) - framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) - _, err = createGenericPod(f, gatewayPodName2, nodes.Items[1].Name, servingNamespace, sleepCommand) - framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + srcPodName string = "e2e-exgw-src-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ) + + var ( + servingNamespace string + ) + + f := wrappedTestFramework(svcname) + + var ( + addressesv4, addressesv6 gatewayTestIPs + sleepCommand []string + nodes *v1.NodeList + err error + clientSet kubernetes.Interface + ) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err = e2enode.GetBoundedReadySchedulableNodes(clientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - // remove the routing external annotation - annotateArgs := []string{ - "annotate", - "namespace", - f.Namespace.Name, - "k8s.ovn.org/routing-external-gws-", - } - ginkgo.By("Resetting the gw annotation") - framework.RunKubectlOrDie(f.Namespace.Name, annotateArgs...) - }) + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } - ginkgo.AfterEach(func() { - // tear down the containers and pods simulating the gateways - ginkgo.By("Deleting the gateway containers") - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) - }) + ns, err := f.CreateNamespace("exgw-conntrack-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name + + addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) + sleepCommand = []string{"bash", "-c", "sleep 20000"} + _, err = createGenericPodWithLabel(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand, map[string]string{"name": gatewayPodName1, "gatewayPod": "true"}) + framework.ExpectNoError(err, "Create the external gw pods to manage the src app pod namespace, failed: %v", err) + _, err = createGenericPodWithLabel(f, gatewayPodName2, nodes.Items[1].Name, servingNamespace, sleepCommand, map[string]string{"name": gatewayPodName2, "gatewayPod": "true"}) + framework.ExpectNoError(err, "Create the external gw pods to manage the src app pod namespace, failed: %v", err) + }) + + ginkgo.AfterEach(func() { + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + deleteAPBExternalRouteCR(defaultPolicyName) + }) + + ginkgotable.DescribeTable("Static Hop: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + ginkgo.By("Create a static hop in an Admin Policy Based External Route CR targeting the app namespace to get managed by external gateways") + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + klog.Infof("iperf3 command %s", strings.Join(cmd, " ")) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + } + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := 2 + totalPodConnEntries := 6 + gomega.Eventually(func() int { + return pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + }, time.Minute, 5).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + updateAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs[0]) + if protocol == "udp" { + podConnEntriesWithMACLabelsSet = 1 // we still have the conntrack entry for the remaining gateway + totalPodConnEntries = 5 // 6-1 + } - ginkgotable.DescribeTable("Namespace annotation: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } - ginkgo.By("Annotate the app namespace to get managed by external gateways") - annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[0], addresses.gatewayIPs[1]) - - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) - } - macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { - ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) - // Trim leading 0s because conntrack dumped labels are just integers - // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") - } + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, 10).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) - ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") - nodeName := getPod(f, srcPodName).Spec.NodeName - podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol - - ginkgo.By("Remove second external gateway IP from the app namespace annotation") - annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs[0]) - - ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") - podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - if protocol == "udp" { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(1)) // we still have the conntrack entry for the remaining gateway - gomega.Expect(totalPodConnEntries).To(gomega.Equal(5)) // 6-1 - } else { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - } + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) - ginkgo.By("Remove first external gateway IP from the app namespace annotation") - annotateNamespaceForGateway(f.Namespace.Name, false, "") - - ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") - podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - if protocol == "udp" { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(0)) // we don't have any remaining gateways left - gomega.Expect(totalPodConnEntries).To(gomega.Equal(4)) // 6-2 - } else { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - } + ginkgo.By("Remove the remaining static hop from the CR") + deleteAPBExternalRouteCR(defaultPolicyName) + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + + podConnEntriesWithMACLabelsSet = 2 + totalPodConnEntries = 6 + if protocol == "udp" { + podConnEntriesWithMACLabelsSet = 0 // we don't have any remaining gateways left + totalPodConnEntries = 4 // 6-2 + } - }, - ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), - ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), - ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), - ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, time.Minute, 5).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) - ginkgotable.DescribeTable("ExternalGWPod annotation: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } - ginkgo.By("Annotate the external gw pods to manage the src app pod namespace") - for i, gwPod := range []string{gatewayPodName1, gatewayPodName2} { - networkIPs := fmt.Sprintf("\"%s\"", addresses.gatewayIPs[i]) - if addresses.srcPodIP != "" && addresses.nodeIP != "" { - networkIPs = fmt.Sprintf("\"%s\", \"%s\"", addresses.gatewayIPs[i], addresses.gatewayIPs[i]) - } - annotatePodForGateway(gwPod, servingNamespace, f.Namespace.Name, networkIPs, false) - } + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + + ginkgotable.DescribeTable("Dynamic Hop: Should validate conntrack entry deletion for TCP/UDP traffic via multiple external gateways a.k.a ECMP routes", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + + for i, gwPod := range []string{gatewayPodName1, gatewayPodName2} { + annotateMultusNetworkStatusInPodGateway(gwPod, servingNamespace, []string{addresses.gatewayIPs[i], addresses.gatewayIPs[i]}) + } + + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + klog.Infof("run command %+v", cmd) + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + } + + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := 2 // TCP + totalPodConnEntries := 6 // TCP + + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, time.Minute, 5).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) // total conntrack entries for this pod/protocol + + ginkgo.By("Remove second external gateway pod's routing-namespace annotation") + p := getGatewayPod(f, servingNamespace, gatewayPodName2) + p.Labels = map[string]string{"name": gatewayPodName2} + updatePod(f, p) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + if protocol == "udp" { + podConnEntriesWithMACLabelsSet = 1 + totalPodConnEntries = 5 + } + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, 10).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) + + ginkgo.By("Remove first external gateway pod's routing-namespace annotation") + p = getGatewayPod(f, servingNamespace, gatewayPodName1) + p.Labels = map[string]string{"name": gatewayPodName1} + updatePod(f, p) + + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = 2 // TCP + totalPodConnEntries = 6 // TCP + if protocol == "udp" { + podConnEntriesWithMACLabelsSet = 0 //we don't have any remaining gateways left + totalPodConnEntries = 4 + } + gomega.Eventually(func() int { + n := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + klog.Infof("Number of entries with macAddressGW %s:%d", macAddressGW, n) + return n + }, 5).Should(gomega.Equal(podConnEntriesWithMACLabelsSet)) + gomega.Expect(pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil)).To(gomega.Equal(totalPodConnEntries)) + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) - // ensure the conntrack deletion tracker annotation is updated - ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") - err := wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { - ns := getNamespace(f, f.Namespace.Name) - return (ns.Annotations[externalGatewayPodIPsAnnotation] == fmt.Sprintf("%s,%s", addresses.gatewayIPs[0], addresses.gatewayIPs[1])), nil }) - framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) - setupIperf3Client := func(container, address string, port int) { - // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag - cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} - _, err := runCommand(cmd...) - framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) - } - macAddressGW := make([]string, 2) - for i, containerName := range []string{gwContainer1, gwContainer2} { - ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") - setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) - macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) - framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) - // Trim leading 0s because conntrack dumped labels are just integers - // in hex without leading 0s. - macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") - } + // BFD Tests are dual of external gateway. The only difference is that they enable BFD on ovn and + // on the external containers, and after doing one round veryfing that the traffic reaches both containers, + // they delete one and verify that the traffic is always reaching the only alive container. + var _ = ginkgo.Context("BFD", func() { + + var _ = ginkgo.Describe("e2e non-vxlan external gateway through a dynamic hop", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + defaultPolicyName = "default-route-policy" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } + + ns, err := f.CreateNamespace("exgw-bfd-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name + + setupBFD := setupBFDOnContainer(nodes.Items) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) + ginkgo.By("Create the external route policy with dynamic hops to manage the src app pod namespace") + + setupPolicyBasedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6) + }) + + ginkgo.AfterEach(func() { + deleteAPBExternalRouteCR(defaultPolicyName) + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + }) + + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with dynamic hop", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, true, addressesv4.gatewayIPs) + + ginkgo.By("Verifying connectivity to the pod from external gateways") + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + // This is needed for bfd to sync up + for _, gwContainer := range gwContainers { + gomega.Eventually(func() bool { + return isBFDPaired(gwContainer, addresses.nodeIP) + }, time.Minute, 5).Should(gomega.BeTrue(), "Bfd not paired") + } + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } + + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + + pingSync := sync.WaitGroup{} + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPingPodName, err) + } + }(address) + } + + pingSync.Wait() + tcpDumpSync.Wait() + + if len(gwContainers) > 1 { + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + time.Sleep(3 * time.Second) // bfd timeout + + tcpDumpSync = sync.WaitGroup{} + tcpDumpSync.Add(1) + go checkPingOnContainer(gwContainers[0], srcPingPodName, icmpCommand, &tcpDumpSync) + + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + pingSync = sync.WaitGroup{} + + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) + } + pingSync.Wait() + tcpDumpSync.Wait() + } + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp"), + ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) + + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with a dynamic hop", + func(protocol string, addresses *gatewayTestIPs, destPort int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, true, addressesv4.gatewayIPs) + + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + + for _, gwContainer := range gwContainers { + gomega.Eventually(func() bool { + return isBFDPaired(gwContainer, addresses.nodeIP) + }, 10, 1).Should(gomega.BeTrue(), "Bfd not paired") + } + + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } + } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") - nodeName := getPod(f, srcPodName).Spec.NodeName - podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol - - ginkgo.By("Remove second external gateway pod's routing-namespace annotation") - annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) - - // ensure the conntrack deletion tracker annotation is updated - ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") - err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { - ns := getNamespace(f, f.Namespace.Name) - return (ns.Annotations[externalGatewayPodIPsAnnotation] == fmt.Sprintf("%s", addresses.gatewayIPs[0])), nil - }) - framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) - - ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") - podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - if protocol == "udp" { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(1)) // we still have the conntrack entry for the remaining gateway - gomega.Expect(totalPodConnEntries).To(gomega.Equal(5)) // 6-1 - } else { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - } + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - ginkgo.By("Remove first external gateway pod's routing-namespace annotation") - annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) + if len(gwContainers) > 1 { + ginkgo.By("Deleting one container") + deleteClusterExternalContainer(gwContainers[1]) + ginkgo.By("Waiting for BFD to sync") + time.Sleep(3 * time.Second) // bfd timeout - // ensure the conntrack deletion tracker annotation is updated - ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") - err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { - ns := getNamespace(f, f.Namespace.Name) - return (ns.Annotations[externalGatewayPodIPsAnnotation] == ""), nil - }) - framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) - - ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") - podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) - totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - if protocol == "udp" { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(0)) // we don't have any remaining gateways left - gomega.Expect(totalPodConnEntries).To(gomega.Equal(4)) // 6-2 - } else { - gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) - gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - } + // ECMP should direct all the traffic to the only container + expectedHostName := hostNameForContainer(gwContainers[0]) - }, - ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), - ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), - ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), - ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) -}) + ginkgo.By("Checking hostname multiple times") + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") + } + } + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort)) + }) + + // Validate pods can reach a network running in multiple container's loopback + // addresses via two external gateways running on eth0 of the container without + // any tunnel encap. This test defines two external gateways and validates ECMP + // functionality to the container loopbacks. To verify traffic reaches the + // gateways, tcpdump is running on the external gateways and will exit successfully + // once an ICMP packet is received from the annotated pod in the k8s cluster. + // Two additional gateways are added to verify the tcp / udp protocols. + // They run the netexec command, and the pod asks to return their hostname. + // The test checks that both hostnames are collected at least once. + var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + testTimeout string = "30" + ecmpRetry int = 20 + srcPodName = "e2e-exgw-src-pod" + externalTCPPort = 80 + externalUDPPort = 90 + ) + + var ( + gwContainers []string + ) + + testContainer := fmt.Sprintf("%s-container", srcPodName) + testContainerFlag := fmt.Sprintf("--container=%s", testContainer) + + f := wrappedTestFramework(svcname) + + var addressesv4, addressesv6 gatewayTestIPs + + ginkgo.BeforeEach(func() { + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } -// BFD Tests are dual of external gateway. The only difference is that they enable BFD on ovn and -// on the external containers, and after doing one round veryfing that the traffic reaches both containers, -// they delete one and verify that the traffic is always reaching the only alive container. -var _ = ginkgo.Context("BFD", func() { - var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { - const ( - svcname string = "externalgw-pod-novxlan" - gwContainer1 string = "ex-gw-container1" - gwContainer2 string = "ex-gw-container2" - srcPingPodName string = "e2e-exgw-src-ping-pod" - gatewayPodName1 string = "e2e-gateway-pod1" - gatewayPodName2 string = "e2e-gateway-pod2" - externalTCPPort = 91 - externalUDPPort = 90 - ecmpRetry int = 20 - testTimeout string = "20" - ) - - var ( - sleepCommand = []string{"bash", "-c", "sleep 20000"} - addressesv4, addressesv6 gatewayTestIPs - clientSet kubernetes.Interface - servingNamespace string - ) - - var ( - gwContainers []string - ) - - f := wrappedTestFramework(svcname) - - ginkgo.BeforeEach(func() { - clientSet = f.ClientSet // so it can be used in AfterEach - // retrieve worker node names - nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } - ns, err := f.CreateNamespace("exgw-bfd-serving", nil) - framework.ExpectNoError(err) - servingNamespace = ns.Name + setupBFD := setupBFDOnContainer(nodes.Items) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) - setupBFD := setupBFDOnContainer(nodes.Items) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) - setupGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, true) - }) + }) - ginkgo.AfterEach(func() { - cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) - }) + ginkgo.AfterEach(func() { + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + deleteAPBExternalRouteCR(defaultPolicyName) + }) - ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(addresses *gatewayTestIPs, icmpCommand string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, true, addresses.gatewayIPs...) - ginkgo.By("Verifying connectivity to the pod from external gateways") - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) + for _, gwContainer := range gwContainers { + gomega.Eventually(func() bool { + return isBFDPaired(gwContainer, addresses.nodeIP) + }, 5).Should(gomega.BeTrue(), "Bfd not paired") + } - for _, gwContainer := range gwContainers { - framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") - } + // Verify the gateways and remote loopback addresses are reachable from the pod. + // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see + // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 + ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") + + // Check for egress traffic to both gateway loopback addresses using tcpdump, since + // /proc/net/dev counters only record the ingress interface traffic is received on. + // The test will waits until an ICMP packet is matched on the gateways or fail the + // test if a packet to the loopback is not received within the timer interval. + // If an ICMP packet is never detected, return the error via the specified chanel. + + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) + } - tcpDumpSync := sync.WaitGroup{} - tcpDumpSync.Add(len(gwContainers)) - for _, gwContainer := range gwContainers { - go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) - } + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. - // Verify the external gateway loopback address running on the external container is reachable and - // that traffic from the source ping pod is proxied through the pod in the serving namespace - ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + pingSync := sync.WaitGroup{} - pingSync := sync.WaitGroup{} - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. - for _, address := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPingPodName, err) - } - }(address) - } + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) + } - pingSync.Wait() - tcpDumpSync.Wait() + pingSync.Wait() + tcpDumpSync.Wait() - if len(gwContainers) > 1 { ginkgo.By("Deleting one container") deleteClusterExternalContainer(gwContainers[1]) time.Sleep(3 * time.Second) // bfd timeout + pingSync = sync.WaitGroup{} tcpDumpSync = sync.WaitGroup{} - tcpDumpSync.Add(1) - go checkPingOnContainer(gwContainers[0], srcPingPodName, icmpCommand, &tcpDumpSync) - // Verify the external gateway loopback address running on the external container is reachable and - // that traffic from the source ping pod is proxied through the pod in the serving namespace - ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") - pingSync = sync.WaitGroup{} + tcpDumpSync.Add(1) + go checkPingOnContainer(gwContainers[0], srcPodName, icmpToDump, &tcpDumpSync) - for _, t := range addresses.targetIPs { + // spawn a goroutine to asynchronously (to speed up the test) + // to ping the gateway loopbacks on both containers via ECMP. + for _, address := range addresses.targetIPs { pingSync.Add(1) go func(target string) { defer ginkgo.GinkgoRecover() defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) - framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) - }(t) + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) + if err != nil { + framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + } + }(address) } + pingSync.Wait() tcpDumpSync.Wait() - } - }, - ginkgotable.Entry("ipv4", &addressesv4, "icmp"), - ginkgotable.Entry("ipv6", &addressesv6, "icmp6")) - ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod with external gateway annotations enabled", - func(protocol string, addresses *gatewayTestIPs, destPort int) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), + ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + // This test runs a listener on the external container, returning the host name both on tcp and udp. + // The src pod tries to hit the remote address until both the containers are hit. + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, true, addresses.gatewayIPs...) - for _, gwContainer := range gwContainers { - framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") - } + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } - expectedHostNames := hostNamesForContainers(gwContainers) - framework.Logf("Expected hostnames are %v", expectedHostNames) + // This is needed for bfd to sync up + time.Sleep(3 * time.Second) - returnedHostNames := make(map[string]struct{}) - target := addresses.targetIPs[0] - success := false - for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) - if hostname != "" { - returnedHostNames[hostname] = struct{}{} + for _, gwContainer := range gwContainers { + framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") } - if cmp.Equal(returnedHostNames, expectedHostNames) { - success = true - break + expectedHostNames := hostNamesForContainers(gwContainers) + framework.Logf("Expected hostnames are %v", expectedHostNames) + + returnedHostNames := make(map[string]struct{}) + success := false + + // Picking only the first address, the one the udp listener is set for + target := addresses.targetIPs[0] + for i := 0; i < 20; i++ { + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } } - } - framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - if !success { - framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) - } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) + + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - if len(gwContainers) > 1 { ginkgo.By("Deleting one container") deleteClusterExternalContainer(gwContainers[1]) ginkgo.By("Waiting for BFD to sync") @@ -780,232 +1924,334 @@ var _ = ginkgo.Context("BFD", func() { ginkgo.By("Checking hostname multiple times") for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPingPodName, f.Namespace.Name, protocol, target, destPort) + hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") } - } - }, - ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort), - ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort), - ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort), - ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort)) - }) - - // Validate pods can reach a network running in multiple container's loopback - // addresses via two external gateways running on eth0 of the container without - // any tunnel encap. This test defines two external gateways and validates ECMP - // functionality to the container loopbacks. To verify traffic reaches the - // gateways, tcpdump is running on the external gateways and will exit successfully - // once an ICMP packet is received from the annotated pod in the k8s cluster. - // Two additional gateways are added to verify the tcp / udp protocols. - // They run the netexec command, and the pod asks to return their hostname. - // The test checks that both hostnames are collected at least once. - var _ = ginkgo.Describe("e2e multiple external gateway validation", func() { - const ( - svcname string = "novxlan-externalgw-ecmp" - gwContainer1 string = "gw-test-container1" - gwContainer2 string = "gw-test-container2" - testTimeout string = "30" - ecmpRetry int = 20 - srcPodName = "e2e-exgw-src-pod" - externalTCPPort = 80 - externalUDPPort = 90 - ) - - var ( - gwContainers []string - ) - - testContainer := fmt.Sprintf("%s-container", srcPodName) - testContainerFlag := fmt.Sprintf("--container=%s", testContainer) - - f := wrappedTestFramework(svcname) - - var addressesv4, addressesv6 gatewayTestIPs - - ginkgo.BeforeEach(func() { - nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) - framework.ExpectNoError(err) - if len(nodes.Items) < 3 { - framework.Failf( - "Test requires >= 3 Ready nodes, but there are only %v nodes", - len(nodes.Items)) - } - - if externalContainerNetwork == "host" { - skipper.Skipf("Skipping as host network doesn't support multiple external gateways") - } - - setupBFD := setupBFDOnContainer(nodes.Items) - gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPodName, externalUDPPort, externalTCPPort, ecmpRetry, setupBFD) - - // remove the routing external annotation - annotateArgs := []string{ - "annotate", - "namespace", - f.Namespace.Name, - "k8s.ovn.org/routing-external-gws-", - } - ginkgo.By("Resetting the gw annotation") - framework.RunKubectlOrDie(f.Namespace.Name, annotateArgs...) - }) - - ginkgo.AfterEach(func() { - // tear down the containers simulating the gateways - deleteClusterExternalContainer(gwContainer1) - deleteClusterExternalContainer(gwContainer2) + }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort)) + }) }) + }) - ginkgotable.DescribeTable("Should validate ICMP connectivity to multiple external gateways for an ECMP scenario", func(addresses *gatewayTestIPs, icmpToDump string) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + var _ = ginkgo.Context("When migrating from Annotations to Admin Policy Based External Route CRs", func() { + // Validate pods can reach a network running in a container's looback address via + // an external gateway running on eth0 of the container without any tunnel encap. + // The traffic will get proxied through an annotated pod in the serving namespace. + var _ = ginkgo.Describe("e2e non-vxlan external gateway through a gateway pod", func() { + const ( + svcname string = "externalgw-pod-novxlan" + gwContainer1 string = "ex-gw-container1" + gwContainer2 string = "ex-gw-container2" + srcPingPodName string = "e2e-exgw-src-ping-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + externalTCPPort = 91 + externalUDPPort = 90 + ecmpRetry int = 20 + testTimeout string = "20" + ) + + var ( + sleepCommand = []string{"bash", "-c", "sleep 20000"} + addressesv4, addressesv6 gatewayTestIPs + clientSet kubernetes.Interface + servingNamespace string + ) + + var ( + gwContainers []string + ) + + f := wrappedTestFramework(svcname) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err := e2enode.GetBoundedReadySchedulableNodes(f.ClientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + ns, err := f.CreateNamespace("exgw-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) + gwContainers, addressesv4, addressesv6 = setupGatewayContainers(f, nodes, gwContainer1, gwContainer2, srcPingPodName, externalUDPPort, externalTCPPort, ecmpRetry) + setupAnnotatedGatewayPods(f, nodes, gatewayPodName1, gatewayPodName2, servingNamespace, sleepCommand, addressesv4, addressesv6, false) + }) - for _, gwContainer := range gwContainers { - framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") - } + ginkgo.AfterEach(func() { + cleanExGWContainers(clientSet, []string{gwContainer1, gwContainer2}, addressesv4, addressesv6) + deleteAPBExternalRouteCR(defaultPolicyName) + resetGatewayAnnotations(f) + }) - // Verify the gateways and remote loopback addresses are reachable from the pod. - // Iterate checking connectivity to the loopbacks on the gateways until tcpdump see - // the traffic or 20 attempts fail. Odds of a false negative here is ~ (1/2)^20 - ginkgo.By("Verifying ecmp connectivity to the external gateways by iterating through the targets") - - // Check for egress traffic to both gateway loopback addresses using tcpdump, since - // /proc/net/dev counters only record the ingress interface traffic is received on. - // The test will waits until an ICMP packet is matched on the gateways or fail the - // test if a packet to the loopback is not received within the timer interval. - // If an ICMP packet is never detected, return the error via the specified chanel. - - tcpDumpSync := sync.WaitGroup{} - tcpDumpSync.Add(len(gwContainers)) - for _, gwContainer := range gwContainers { - go checkPingOnContainer(gwContainer, srcPodName, icmpToDump, &tcpDumpSync) - } + ginkgotable.DescribeTable("Should validate ICMP connectivity to an external gateway's loopback address via a pod with external gateway annotations and a policy CR and after the annotations are removed", + func(addresses *gatewayTestIPs, icmpCommand string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + ginkgo.By("Remove gateway annotations in pods") + annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) + annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) + ginkgo.By("Validate ICMP connectivity again with only CR policy to support it") + ginkgo.By(fmt.Sprintf("Verifying connectivity to the pod [%s] from external gateways", addresses.srcPodIP)) + for _, gwContainer := range gwContainers { + _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) + framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) + } + tcpDumpSync := sync.WaitGroup{} + tcpDumpSync.Add(len(gwContainers)) - pingSync := sync.WaitGroup{} + for _, gwContainer := range gwContainers { + go checkPingOnContainer(gwContainer, srcPingPodName, icmpCommand, &tcpDumpSync) + } - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. - for _, address := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + // Verify the external gateway loopback address running on the external container is reachable and + // that traffic from the source ping pod is proxied through the pod in the serving namespace + ginkgo.By("Verifying connectivity via the gateway namespace to the remote addresses") + pingSync := sync.WaitGroup{} + for _, t := range addresses.targetIPs { + pingSync.Add(1) + go func(target string) { + defer ginkgo.GinkgoRecover() + defer pingSync.Done() + _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPingPodName, "--", "ping", "-c", testTimeout, target) + framework.ExpectNoError(err, "Failed to ping remote gateway %s from pod %s", target, srcPingPodName) + }(t) } - }(address) - } + pingSync.Wait() + tcpDumpSync.Wait() + }, + ginkgotable.Entry("ipv4", &addressesv4, "icmp")) - pingSync.Wait() - tcpDumpSync.Wait() + ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to an external gateway's loopback address via a pod when deleting the annotation and supported by a CR with the same gateway IPs", + func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + ginkgo.By("removing the annotations in the pod gateways") + annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) + annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) - ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) - time.Sleep(3 * time.Second) // bfd timeout + for _, container := range gwContainers { + reachPodFromContainer(addresses.srcPodIP, strconv.Itoa(destPortOnPod), srcPingPodName, container, protocol) + } - pingSync = sync.WaitGroup{} - tcpDumpSync = sync.WaitGroup{} + expectedHostNames := make(map[string]struct{}) + for _, c := range gwContainers { + res, err := runCommand(containerRuntime, "exec", c, "hostname") + framework.ExpectNoError(err, "failed to run hostname in %s", c) + hostname := strings.TrimSuffix(res, "\n") + framework.Logf("Hostname for %s is %s", c, hostname) + expectedHostNames[hostname] = struct{}{} + } + framework.Logf("Expected hostnames are %v", expectedHostNames) - tcpDumpSync.Add(1) - go checkPingOnContainer(gwContainers[0], srcPodName, icmpToDump, &tcpDumpSync) + ginkgo.By("Checking that external ips are reachable with both gateways") + returnedHostNames := make(map[string]struct{}) + target := addresses.targetIPs[0] + success := false + for i := 0; i < 20; i++ { + args := []string{"exec", srcPingPodName, "--"} + if protocol == "tcp" { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 %s %d", target, destPort)) + } else { + args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -w 1 -u %s %d", target, destPort)) + } + res, err := framework.RunKubectl(f.Namespace.Name, args...) + framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) + hostname := strings.TrimSuffix(res, "\n") + if hostname != "" { + returnedHostNames[hostname] = struct{}{} + } - // spawn a goroutine to asynchronously (to speed up the test) - // to ping the gateway loopbacks on both containers via ECMP. - for _, address := range addresses.targetIPs { - pingSync.Add(1) - go func(target string) { - defer ginkgo.GinkgoRecover() - defer pingSync.Done() - _, err := framework.RunKubectl(f.Namespace.Name, "exec", srcPodName, testContainerFlag, "--", "ping", "-c", testTimeout, target) - if err != nil { - framework.Logf("error generating a ping from the test pod %s: %v", srcPodName, err) + if cmp.Equal(returnedHostNames, expectedHostNames) { + success = true + break + } } - }(address) - } + framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - pingSync.Wait() - tcpDumpSync.Wait() + if !success { + framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) + } - }, ginkgotable.Entry("IPV4", &addressesv4, "icmp"), - ginkgotable.Entry("IPV6", &addressesv6, "icmp6")) + }, + ginkgotable.Entry("UDP ipv4", "udp", &addressesv4, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv4", "tcp", &addressesv4, externalTCPPort, srcHTTPPort), + ginkgotable.Entry("UDP ipv6", "udp", &addressesv6, externalUDPPort, srcUDPPort), + ginkgotable.Entry("TCP ipv6", "tcp", &addressesv6, externalTCPPort, srcHTTPPort)) + }) - // This test runs a listener on the external container, returning the host name both on tcp and udp. - // The src pod tries to hit the remote address until both the containers are hit. - ginkgotable.DescribeTable("Should validate TCP/UDP connectivity to multiple external gateways for a UDP / TCP scenario", func(addresses *gatewayTestIPs, protocol string, destPort int) { - if addresses.srcPodIP == "" || addresses.nodeIP == "" { - skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) - } + var _ = ginkgo.Describe("e2e multiple external gateway stale conntrack entry deletion validation", func() { + const ( + svcname string = "novxlan-externalgw-ecmp" + gwContainer1 string = "gw-test-container1" + gwContainer2 string = "gw-test-container2" + srcPodName string = "e2e-exgw-src-pod" + gatewayPodName1 string = "e2e-gateway-pod1" + gatewayPodName2 string = "e2e-gateway-pod2" + ) + + var ( + servingNamespace string + ) + + f := wrappedTestFramework(svcname) + + var ( + addressesv4, addressesv6 gatewayTestIPs + sleepCommand []string + nodes *v1.NodeList + err error + clientSet kubernetes.Interface + ) + + ginkgo.BeforeEach(func() { + clientSet = f.ClientSet // so it can be used in AfterEach + // retrieve worker node names + nodes, err = e2enode.GetBoundedReadySchedulableNodes(clientSet, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 3 { + framework.Failf( + "Test requires >= 3 Ready nodes, but there are only %v nodes", + len(nodes.Items)) + } - annotateNamespaceForGateway(f.Namespace.Name, true, addresses.gatewayIPs[:]...) + if externalContainerNetwork == "host" { + skipper.Skipf("Skipping as host network doesn't support multiple external gateways") + } - for _, gwContainer := range gwContainers { - _, err := runCommand(containerRuntime, "exec", gwContainer, "ping", "-c", testTimeout, addresses.srcPodIP) - framework.ExpectNoError(err, "Failed to ping %s from container %s", addresses.srcPodIP, gwContainer) - } + ns, err := f.CreateNamespace("exgw-conntrack-serving", nil) + framework.ExpectNoError(err) + servingNamespace = ns.Name + + addressesv4, addressesv6 = setupGatewayContainersForConntrackTest(f, nodes, gwContainer1, gwContainer2, srcPodName) + sleepCommand = []string{"bash", "-c", "sleep 20000"} + _, err = createGenericPodWithLabel(f, gatewayPodName1, nodes.Items[0].Name, servingNamespace, sleepCommand, map[string]string{"gatewayPod": "true"}) + framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + _, err = createGenericPodWithLabel(f, gatewayPodName2, nodes.Items[1].Name, servingNamespace, sleepCommand, map[string]string{"gatewayPod": "true"}) + framework.ExpectNoError(err, "Create and annotate the external gw pods to manage the src app pod namespace, failed: %v", err) + }) + + ginkgo.AfterEach(func() { + // tear down the containers and pods simulating the gateways + ginkgo.By("Deleting the gateway containers") + deleteClusterExternalContainer(gwContainer1) + deleteClusterExternalContainer(gwContainer2) + deleteAPBExternalRouteCR(defaultPolicyName) + resetGatewayAnnotations(f) + }) + + ginkgotable.DescribeTable("Namespace annotation: Should validate conntrack entry remains unchanged when deleting the annotation in the namespace while the CR static hop still references the same namespace in the policy", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) + } + ginkgo.By("Annotate the app namespace to get managed by external gateways") + annotateNamespaceForGateway(f.Namespace.Name, false, addresses.gatewayIPs...) + createAPBExternalRouteCRWithStaticHop(defaultPolicyName, f.Namespace.Name, false, addresses.gatewayIPs...) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") + } + ginkgo.By("Removing the namespace annotations to leave only the CR policy active") + annotateNamespaceForGateway(f.Namespace.Name, false, "") - // This is needed for bfd to sync up - time.Sleep(3 * time.Second) + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol - for _, gwContainer := range gwContainers { - framework.ExpectEqual(isBFDPaired(gwContainer, addresses.nodeIP), true, "Bfd not paired") - } + ginkgo.By("Check if conntrack entries for ECMP routes are removed for the deleted external gateway if traffic is UDP") + podConnEntriesWithMACLabelsSet = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + totalPodConnEntries = pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) - expectedHostNames := hostNamesForContainers(gwContainers) - framework.Logf("Expected hostnames are %v", expectedHostNames) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) - returnedHostNames := make(map[string]struct{}) - success := false + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) - // Picking only the first address, the one the udp listener is set for - target := addresses.targetIPs[0] - for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) - if hostname != "" { - returnedHostNames[hostname] = struct{}{} + ginkgotable.DescribeTable("ExternalGWPod annotation: Should validate conntrack entry remains unchanged when deleting the annotation in the pods while the CR dynamic hop still references the same pods with the pod selector", func(addresses *gatewayTestIPs, protocol string) { + if addresses.srcPodIP == "" || addresses.nodeIP == "" { + skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } - if cmp.Equal(returnedHostNames, expectedHostNames) { - success = true - break + ginkgo.By("Annotate the external gw pods to manage the src app pod namespace") + for i, gwPod := range []string{gatewayPodName1, gatewayPodName2} { + networkIPs := fmt.Sprintf("\"%s\"", addresses.gatewayIPs[i]) + if addresses.srcPodIP != "" && addresses.nodeIP != "" { + networkIPs = fmt.Sprintf("\"%s\", \"%s\"", addresses.gatewayIPs[i], addresses.gatewayIPs[i]) + } + annotatePodForGateway(gwPod, servingNamespace, f.Namespace.Name, networkIPs, false) + } + createAPBExternalRouteCRWithDynamicHop(defaultPolicyName, f.Namespace.Name, servingNamespace, false, addressesv4.gatewayIPs) + // ensure the conntrack deletion tracker annotation is updated + ginkgo.By("Check if the k8s.ovn.org/external-gw-pod-ips got updated for the app namespace") + err := wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { + ns := getNamespace(f, f.Namespace.Name) + return (ns.Annotations[externalGatewayPodIPsAnnotation] == fmt.Sprintf("%s,%s", addresses.gatewayIPs[0], addresses.gatewayIPs[1])), nil + }) + framework.ExpectNoError(err, "Check if the k8s.ovn.org/external-gw-pod-ips got updated, failed: %v", err) + annotatePodForGateway(gatewayPodName2, servingNamespace, "", addresses.gatewayIPs[1], false) + annotatePodForGateway(gatewayPodName1, servingNamespace, "", addresses.gatewayIPs[0], false) + + setupIperf3Client := func(container, address string, port int) { + // note iperf3 even when using udp also spawns tcp connection first; so we indirectly also have the tcp connection when using "-u" flag + cmd := []string{containerRuntime, "exec", container, "iperf3", "-u", "-c", address, "-p", fmt.Sprintf("%d", port), "-b", "1M", "-i", "1", "-t", "3", "&"} + _, err := runCommand(cmd...) + framework.ExpectNoError(err, "failed to setup iperf3 client for %s", container) + } + macAddressGW := make([]string, 2) + for i, containerName := range []string{gwContainer1, gwContainer2} { + ginkgo.By("Start iperf3 client from external container to connect to iperf3 server running at the src pod") + setupIperf3Client(containerName, addresses.srcPodIP, 5201+i) + macAddressExtGW, err := net.ParseMAC(getMACAddressesForNetwork(containerName, externalContainerNetwork)) + framework.ExpectNoError(err, "failed to parse MAC address for %s", containerName) + // Trim leading 0s because conntrack dumped labels are just integers + // in hex without leading 0s. + macAddressGW[i] = strings.TrimLeft(strings.Replace(macAddressExtGW.String(), ":", "", -1), "0") } - } - - framework.Logf("Received hostnames for protocol %s are %v ", protocol, returnedHostNames) - - if !success { - framework.Failf("Failed to hit all the external gateways via for protocol %s, diff %s", protocol, cmp.Diff(expectedHostNames, returnedHostNames)) - } - - ginkgo.By("Deleting one container") - deleteClusterExternalContainer(gwContainers[1]) - ginkgo.By("Waiting for BFD to sync") - time.Sleep(3 * time.Second) // bfd timeout - // ECMP should direct all the traffic to the only container - expectedHostName := hostNameForContainer(gwContainers[0]) + ginkgo.By("Check if conntrack entries for ECMP routes are created for the 2 external gateways") + nodeName := getPod(f, srcPodName).Spec.NodeName + podConnEntriesWithMACLabelsSet := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, macAddressGW) + gomega.Expect(podConnEntriesWithMACLabelsSet).To(gomega.Equal(2)) + totalPodConnEntries := pokeConntrackEntries(nodeName, addresses.srcPodIP, protocol, nil) + gomega.Expect(totalPodConnEntries).To(gomega.Equal(6)) // total conntrack entries for this pod/protocol + }, + ginkgotable.Entry("IPV4 udp", &addressesv4, "udp"), + ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp"), + ginkgotable.Entry("IPV6 udp", &addressesv6, "udp"), + ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp")) + }) - ginkgo.By("Checking hostname multiple times") - for i := 0; i < 20; i++ { - hostname := pokeHostnameViaNC(srcPodName, f.Namespace.Name, protocol, target, destPort) - framework.ExpectEqual(expectedHostName, hostname, "Hostname returned by nc not as expected") - } - }, ginkgotable.Entry("IPV4 udp", &addressesv4, "udp", externalUDPPort), - ginkgotable.Entry("IPV4 tcp", &addressesv4, "tcp", externalTCPPort), - ginkgotable.Entry("IPV6 udp", &addressesv6, "udp", externalUDPPort), - ginkgotable.Entry("IPV6 tcp", &addressesv6, "tcp", externalTCPPort)) }) }) @@ -1148,14 +2394,14 @@ func setupGatewayContainers(f *framework.Framework, nodes *v1.NodeList, containe return gwContainers, addressesv4, addressesv6 } -func setupGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs, bfd bool) []string { +func setupAnnotatedGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs, bfd bool) []string { gwPods := []string{pod1, pod2} if externalContainerNetwork == "host" { gwPods = []string{pod1} } for i, gwPod := range gwPods { - _, err := createGenericPod(f, gwPod, nodes.Items[i].Name, ns, cmd) + _, err := createGenericPodWithLabel(f, gwPod, nodes.Items[i].Name, ns, cmd, map[string]string{"gatewayPod": "true"}) framework.ExpectNoError(err) } @@ -1170,6 +2416,24 @@ func setupGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns return gwPods } +func setupPolicyBasedGatewayPods(f *framework.Framework, nodes *v1.NodeList, pod1, pod2, ns string, cmd []string, addressesv4, addressesv6 gatewayTestIPs) []string { + gwPods := []string{pod1, pod2} + if externalContainerNetwork == "host" { + gwPods = []string{pod1} + } + + for i, gwPod := range gwPods { + _, err := createGenericPodWithLabel(f, gwPod, nodes.Items[i].Name, ns, cmd, map[string]string{"gatewayPod": "true"}) + framework.ExpectNoError(err) + } + + for i, gwPod := range gwPods { + annotateMultusNetworkStatusInPodGateway(gwPod, ns, []string{addressesv4.gatewayIPs[i]}) + } + + return gwPods +} + func cleanExGWContainers(clientSet kubernetes.Interface, gwContainers []string, addressesv4, addressesv6 gatewayTestIPs) { ginkgo.By("Deleting the gateway containers") if externalContainerNetwork == "host" { @@ -1185,23 +2449,26 @@ func cleanExGWContainers(clientSet kubernetes.Interface, gwContainers []string, // setupGatewayContainersForConntrackTest sets up iperf3 external containers, adds routes to src // pods via the nodes, starts up iperf3 server on src-pod func setupGatewayContainersForConntrackTest(f *framework.Framework, nodes *v1.NodeList, gwContainer1, gwContainer2, srcPodName string) (gatewayTestIPs, gatewayTestIPs) { + var ( + err error + clientPod *v1.Pod + ) addressesv4 := gatewayTestIPs{gatewayIPs: make([]string, 2)} addressesv6 := gatewayTestIPs{gatewayIPs: make([]string, 2)} - ginkgo.By("Creating the gateway containers for the UDP test") addressesv4.gatewayIPs[0], addressesv6.gatewayIPs[0] = createClusterExternalContainer(gwContainer1, iperf3Image, []string{"-itd", "--privileged", "--network", externalContainerNetwork}, []string{}) addressesv4.gatewayIPs[1], addressesv6.gatewayIPs[1] = createClusterExternalContainer(gwContainer2, iperf3Image, []string{"-itd", "--privileged", "--network", externalContainerNetwork}, []string{}) node := nodes.Items[0] - addressesv4.nodeIP, addressesv6.nodeIP = getContainerAddressesForNetwork(node.Name, externalContainerNetwork) - framework.Logf("the pod side node is %s and the source node ip is %s - %s", node.Name, addressesv4.nodeIP, addressesv6.nodeIP) - ginkgo.By("Creating the source pod to reach the destination ips from") - clientPod, err := createPod(f, srcPodName, node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) { + clientPod, err = createPod(f, srcPodName, node.Name, f.Namespace.Name, []string{}, map[string]string{}, func(p *v1.Pod) { p.Spec.Containers[0].Image = iperf3Image }) framework.ExpectNoError(err) + addressesv4.nodeIP, addressesv6.nodeIP = getContainerAddressesForNetwork(node.Name, externalContainerNetwork) + framework.Logf("the pod side node is %s and the source node ip is %s - %s", node.Name, addressesv4.nodeIP, addressesv6.nodeIP) + // start iperf3 servers at ports 5201 and 5202 on the src app pod args := []string{"exec", srcPodName, "--", "iperf3", "-s", "--daemon", "-V", fmt.Sprintf("-p %d", 5201)} _, err = framework.RunKubectl(f.Namespace.Name, args...) @@ -1265,20 +2532,40 @@ func annotatePodForGateway(podName, podNS, namespace, networkIPs string, bfd boo // this fakes out the multus annotation so that the pod IP is // actually an IP of an external container for testing purposes annotateArgs := []string{ - "annotate", - "pods", - "--overwrite", - podName, fmt.Sprintf("k8s.v1.cni.cncf.io/network-status=[{\"name\":\"%s\",\"interface\":"+ "\"net1\",\"ips\":[%s],\"mac\":\"%s\"}]", "foo", networkIPs, "01:23:45:67:89:10"), fmt.Sprintf("k8s.ovn.org/routing-namespaces=%s", namespace), fmt.Sprintf("k8s.ovn.org/routing-network=%s", "foo"), - "--overwrite", } if bfd { annotateArgs = append(annotateArgs, "k8s.ovn.org/bfd-enabled=\"\"") } - framework.Logf("Annotating the external gateway pod with annotation %s", annotateArgs) + annotatePodForGatewayWithAnnotations(podName, podNS, annotateArgs) +} + +func annotateMultusNetworkStatusInPodGateway(podName, podNS string, networkIPs []string) { + // add the annotations to the pod to enable the gateway forwarding. + // this fakes out the multus annotation so that the pod IP is + // actually an IP of an external container for testing purposes + nStatus := []nettypes.NetworkStatus{{Name: "foo", Interface: "net1", IPs: networkIPs, Mac: "01:23:45:67:89:10"}} + out, err := json.Marshal(nStatus) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + annotatePodForGatewayWithAnnotations(podName, podNS, []string{fmt.Sprintf("k8s.v1.cni.cncf.io/network-status=%s", string(out))}) +} + +func annotatePodForGatewayWithAnnotations(podName, podNS string, annotations []string) { + // add the annotations to the pod to enable the gateway forwarding. + // this fakes out the multus annotation so that the pod IP is + // actually an IP of an external container for testing purposes + annotateArgs := []string{ + "annotate", + "pods", + podName, + "--overwrite", + } + annotateArgs = append(annotateArgs, annotations...) + framework.Logf("Annotating the external gateway pod with annotation '%s'", annotateArgs) framework.RunKubectlOrDie(podNS, annotateArgs...) } @@ -1300,6 +2587,160 @@ func annotateNamespaceForGateway(namespace string, bfd bool, gateways ...string) framework.RunKubectlOrDie(namespace, annotateArgs...) } +func removeStaticGatewayAnnotationInNamespace(namespace string) { + + // annotate the test namespace with multiple gateways defined + annotateArgs := []string{ + "annotate", + "namespace", + namespace, + "k8s.ovn.org/routing-external-gws-", + "--overwrite", + } + framework.RunKubectlOrDie(namespace, annotateArgs...) +} + +func createAPBExternalRouteCRWithDynamicHop(policyName, targetNamespace, servingNamespace string, bfd bool, gateways []string) { + data := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 +kind: AdminPolicyBasedExternalRoute +metadata: + name: %s +spec: + from: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: %s + nextHops: + dynamic: +%s +`, policyName, targetNamespace, formatDynamicHops(bfd, servingNamespace)) + stdout, err := framework.RunKubectlInput("", data, "create", "-f", "-") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(stdout).To(gomega.Equal(fmt.Sprintf("adminpolicybasedexternalroute.k8s.ovn.org/%s created\n", policyName))) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gwIPs := sets.NewString(gateways...).List() + gomega.Eventually(func() string { + lastMsg, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.messages[-1:]}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return lastMsg + }, time.Minute, 1).Should(gomega.Equal(fmt.Sprintf("Configured external gateway IPs: %s", strings.Join(gwIPs, ",")))) + gomega.Eventually(func() string { + status, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.status}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return status + }, time.Minute, 1).Should(gomega.Equal("Success")) +} +func createAPBExternalRouteCRWithStaticHop(policyName, namespaceName string, bfd bool, gateways ...string) { + + data := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 +kind: AdminPolicyBasedExternalRoute +metadata: + name: %s +spec: + from: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: %s + nextHops: + static: +%s +`, policyName, namespaceName, formatStaticHops(bfd, gateways...)) + stdout, err := framework.RunKubectlInput("", data, "create", "-f", "-", "--save-config") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(stdout).To(gomega.Equal(fmt.Sprintf("adminpolicybasedexternalroute.k8s.ovn.org/%s created\n", policyName))) + gwIPs := sets.NewString(gateways...).List() + gomega.Eventually(func() string { + lastMsg, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.messages[-1:]}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return lastMsg + }, time.Minute, 1).Should(gomega.Equal(fmt.Sprintf("Configured external gateway IPs: %s", strings.Join(gwIPs, ",")))) + gomega.Eventually(func() string { + status, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.status}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return status + }, time.Minute, 1).Should(gomega.Equal("Success")) +} + +func updateAPBExternalRouteCRWithStaticHop(policyName, namespaceName string, bfd bool, gateways ...string) { + + lastUpdatetime, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.lastTransitionTime}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + data := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 +kind: AdminPolicyBasedExternalRoute +metadata: + name: %s +spec: + from: + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: %s + nextHops: + static: +%s +`, policyName, namespaceName, formatStaticHops(bfd, gateways...)) + _, err = framework.RunKubectlInput(namespaceName, data, "apply", "-f", "-") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() string { + lastMsg, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.messages[-1:]}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return lastMsg + }, 10).Should(gomega.Equal(fmt.Sprintf("Configured external gateway IPs: %s", strings.Join(gateways, ",")))) + + gomega.Eventually(func() string { + s, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.status}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return s + }, 10).Should(gomega.Equal("Success")) + gomega.Eventually(func() string { + t, err := framework.RunKubectl("", "get", "apbexternalroute", policyName, "-ojsonpath={.status.lastTransitionTime}") + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + return t + }, 10, 1).ShouldNot(gomega.Equal(lastUpdatetime)) + +} + +func deleteAPBExternalRouteCR(policyName string) { + framework.RunKubectl("", "delete", "apbexternalroute", policyName) +} +func formatStaticHops(bfd bool, gateways ...string) string { + b := strings.Builder{} + bfdEnabled := "true" + if !bfd { + bfdEnabled = "false" + } + for _, gateway := range gateways { + b.WriteString(fmt.Sprintf(` - ip: "%s" + bfdEnabled: %s +`, gateway, bfdEnabled)) + } + return b.String() +} + +func formatDynamicHops(bfd bool, servingNamespace string) string { + b := strings.Builder{} + bfdEnabled := "true" + if !bfd { + bfdEnabled = "false" + } + b.WriteString(fmt.Sprintf(` - podSelector: + matchLabels: + gatewayPod: "true" + bfdEnabled: %s + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: %s + networkAttachmentName: foo +`, bfdEnabled, servingNamespace)) + return b.String() +} + +func getGatewayPod(f *framework.Framework, podNamespace, podName string) *v1.Pod { + pod, err := f.ClientSet.CoreV1().Pods(podNamespace).Get(context.Background(), podName, metav1.GetOptions{}) + framework.ExpectNoError(err, fmt.Sprintf("unable to get pod: %s, err: %v", podName, err)) + return pod +} + func hostNamesForContainers(containers []string) map[string]struct{} { res := make(map[string]struct{}) for _, c := range containers { @@ -1341,8 +2782,11 @@ func pokeConntrackEntries(nodeName, podIP, protocol string, patterns []string) i for _, connEntry := range strings.Split(conntrackEntries, "\n") { match := strings.Contains(connEntry, protocol) && strings.Contains(connEntry, podIP) for _, pattern := range patterns { - if strings.Contains(connEntry, pattern) && match { - numOfConnEntries++ + if match { + klog.Infof("%s in %s", pattern, connEntry) + if strings.Contains(connEntry, pattern) { + numOfConnEntries++ + } } } if len(patterns) == 0 && match { @@ -1398,10 +2842,7 @@ EOF func isBFDPaired(container, peer string) bool { res, err := runCommand(containerRuntime, "exec", container, "bash", "-c", fmt.Sprintf("vtysh -c \"show bfd peer %s\"", peer)) framework.ExpectNoError(err, "failed to check bfd status in %s", container) - if strings.Contains(res, "Status: up") { - return true - } - return false + return strings.Contains(res, "Status: up") } // When running on host network we clean the routes and ips we added previously @@ -1444,3 +2885,22 @@ func checkPingOnContainer(container string, srcPodName string, icmpCmd string, w framework.ExpectNoError(err, "Failed to detect icmp messages from %s on gateway %s", srcPodName, container) framework.Logf("ICMP packet successfully detected on gateway %s", container) } + +func resetGatewayAnnotations(f *framework.Framework) { + // remove the routing external annotation + if f == nil || f.Namespace == nil { + return + } + annotations := []string{ + "k8s.ovn.org/routing-external-gws-", + "k8s.ovn.org/bfd-enabled-", + } + ginkgo.By("Resetting the gw annotations") + for _, annotation := range annotations { + framework.RunKubectlOrDie("", []string{ + "annotate", + "namespace", + f.Namespace.Name, + annotation}...) + } +} From 513482b7daf276cdc4e3e9ee121801a1ec4b2b10 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 25 May 2023 19:22:04 +0200 Subject: [PATCH 09/73] Initialize EgressIP stopChan in cluster-manager Without it Stop() will always fail with `close of nil channel`. Additionally removed unused fields from FakeClusterManager. Signed-off-by: Patryk Diak --- go-controller/pkg/clustermanager/egressip_controller.go | 1 + .../pkg/clustermanager/fake_cluster_manager_test.go | 9 +-------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/go-controller/pkg/clustermanager/egressip_controller.go b/go-controller/pkg/clustermanager/egressip_controller.go index f2d8d0587f..ff358f77ca 100644 --- a/go-controller/pkg/clustermanager/egressip_controller.go +++ b/go-controller/pkg/clustermanager/egressip_controller.go @@ -413,6 +413,7 @@ func newEgressIPController(ovnClient *util.OVNClusterManagerClientset, wf *facto egressIPTotalTimeout: config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout, reachabilityCheckInterval: egressIPReachabilityCheckInterval, egressIPNodeHealthCheckPort: config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort, + stopChan: make(chan struct{}), } eIPC.initRetryFramework() return eIPC diff --git a/go-controller/pkg/clustermanager/fake_cluster_manager_test.go b/go-controller/pkg/clustermanager/fake_cluster_manager_test.go index 805f9d181e..7456aeb539 100644 --- a/go-controller/pkg/clustermanager/fake_cluster_manager_test.go +++ b/go-controller/pkg/clustermanager/fake_cluster_manager_test.go @@ -1,8 +1,6 @@ package clustermanager import ( - "sync" - "github.com/onsi/gomega" egressip "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1" egressipfake "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/egressip/v1/apis/clientset/versioned/fake" @@ -17,8 +15,6 @@ type FakeClusterManager struct { fakeClient *util.OVNClusterManagerClientset watcher *factory.WatchFactory eIPC *egressIPClusterController - stopChan chan struct{} - wg *sync.WaitGroup fakeRecorder *record.FakeRecorder } @@ -52,14 +48,11 @@ func (o *FakeClusterManager) init() { err = o.watcher.Start() gomega.Expect(err).NotTo(gomega.HaveOccurred()) - o.stopChan = make(chan struct{}) - o.wg = &sync.WaitGroup{} o.eIPC = newEgressIPController(o.fakeClient, o.watcher, o.fakeRecorder) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } func (o *FakeClusterManager) shutdown() { o.watcher.Shutdown() - close(o.stopChan) - o.wg.Wait() + o.eIPC.Stop() } From 41fb66586269ec39033643a20732718a4209a511 Mon Sep 17 00:00:00 2001 From: Jamo Luhrsen Date: Wed, 17 May 2023 15:47:02 -0700 Subject: [PATCH 10/73] upgrade go and linter versions - go 19 in go.mod - use go v1.19.6 in Makefile for building - upgrade golangci linter to latest version - one whitespace error fixed as part of the lint upgrade Signed-off-by: Jamo Luhrsen --- go-controller/Makefile | 2 +- go-controller/go.mod | 2 +- go-controller/hack/lint.sh | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/go-controller/Makefile b/go-controller/Makefile index b854c00478..f6262f1390 100644 --- a/go-controller/Makefile +++ b/go-controller/Makefile @@ -11,7 +11,7 @@ PKGS ?= GOPATH ?= $(shell go env GOPATH) TEST_REPORT_DIR?=$(CURDIR)/_artifacts export TEST_REPORT_DIR -GO_VERSION ?= 1.18.4 +GO_VERSION ?= 1.19.6 GO_DOCKER_IMG = quay.io/giantswarm/golang:${GO_VERSION} # CONTAINER_RUNNABLE determines if the tests can be run inside a container. It checks to see if # podman/docker is installed on the system. diff --git a/go-controller/go.mod b/go-controller/go.mod index 8a3db3d405..2488f6e7e6 100644 --- a/go-controller/go.mod +++ b/go-controller/go.mod @@ -1,6 +1,6 @@ module github.com/ovn-org/ovn-kubernetes/go-controller -go 1.18 +go 1.19 require ( github.com/Microsoft/hcsshim v0.9.6 diff --git a/go-controller/hack/lint.sh b/go-controller/hack/lint.sh index 557d3c9c02..e6d2396b76 100755 --- a/go-controller/hack/lint.sh +++ b/go-controller/hack/lint.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash -# pin golangci-lint version to 1.46.0 -VERSION=v1.46.0 +VERSION=v1.52.2 if [ "$#" -ne 1 ]; then echo "Expected command line argument - container runtime (docker/podman) got $# arguments: $@" exit 1 From 89f953da99e48bfbcc5c65cfb709f2770c3fcd8a Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Sat, 6 May 2023 21:33:26 +0800 Subject: [PATCH 11/73] Allow external gateway bridge without uplink port In local gateway mode Users can create the external gateway bridge without attaching a host physical interface as the uplink port in local gateway mode. The gateway router will use 169.254.169.4 as the default gateway. Add a new gateway config flag 'allow-no-uplink' to controll if this setup is allowed or not. It's disabled by default. With this setup, egressIP and egressGW can not work. Signed-off-by: Peng Liu --- go-controller/pkg/config/config.go | 7 + go-controller/pkg/config/config_test.go | 5 + go-controller/pkg/node/gateway.go | 9 +- go-controller/pkg/node/gateway_init.go | 34 +- .../pkg/node/gateway_init_linux_test.go | 121 ++++++ go-controller/pkg/node/gateway_shared_intf.go | 402 ++++++++++-------- go-controller/pkg/node/helper_linux.go | 18 +- 7 files changed, 396 insertions(+), 200 deletions(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index a0aa868eff..ed81bb719a 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -406,6 +406,8 @@ type GatewayConfig struct { SingleNode bool `gcfg:"single-node"` // DisableForwarding (enabled by default) controls if forwarding is allowed on OVNK controlled interfaces DisableForwarding bool `gcfg:"disable-forwarding"` + // AllowNoUplink (disabled by default) controls if the external gateway bridge without an uplink port is allowed in local gateway mode. + AllowNoUplink bool `gcfg:"allow-no-uplink"` } // OvnAuthConfig holds client authentication and location details for @@ -1273,6 +1275,11 @@ var OVNGatewayFlags = []cli.Flag{ "Single node indicates a one node cluster and allows to simplify ovn-kubernetes gateway logic", Destination: &cliConfig.Gateway.SingleNode, }, + &cli.BoolFlag{ + Name: "allow-no-uplink", + Usage: "Allow the external gateway bridge without an uplink port in local gateway mode", + Destination: &cliConfig.Gateway.AllowNoUplink, + }, // Deprecated CLI options &cli.BoolFlag{ Name: "init-gateways", diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 80e7fcad21..1b635cfd23 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -204,6 +204,7 @@ v6-join-subnet=fd90::/64 router-subnet=10.50.0.0/16 single-node=false disable-forwarding=true +allow-no-uplink=false [hybridoverlay] enabled=true @@ -310,6 +311,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("")) gomega.Expect(Gateway.SingleNode).To(gomega.BeFalse()) gomega.Expect(Gateway.DisableForwarding).To(gomega.BeFalse()) + gomega.Expect(Gateway.AllowNoUplink).To(gomega.BeFalse()) gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(1)) gomega.Expect(OVNKubernetesFeature.EgressIPNodeHealthCheckPort).To(gomega.Equal(0)) gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeFalse()) @@ -625,6 +627,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("10.50.0.0/16")) gomega.Expect(Gateway.SingleNode).To(gomega.BeFalse()) gomega.Expect(Gateway.DisableForwarding).To(gomega.BeTrue()) + gomega.Expect(Gateway.AllowNoUplink).To(gomega.BeFalse()) gomega.Expect(HybridOverlay.Enabled).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(3)) @@ -713,6 +716,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Gateway.RouterSubnet).To(gomega.Equal("10.55.0.0/16")) gomega.Expect(Gateway.SingleNode).To(gomega.BeTrue()) gomega.Expect(Gateway.DisableForwarding).To(gomega.BeTrue()) + gomega.Expect(Gateway.AllowNoUplink).To(gomega.BeTrue()) gomega.Expect(HybridOverlay.Enabled).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout).To(gomega.Equal(5)) @@ -767,6 +771,7 @@ var _ = Describe("Config Operations", func() { "-gateway-router-subnet=10.55.0.0/16", "-single-node", "-disable-forwarding", + "-allow-no-uplink", "-enable-hybrid-overlay", "-hybrid-overlay-cluster-subnets=11.132.0.0/14/23", "-monitor-all=false", diff --git a/go-controller/pkg/node/gateway.go b/go-controller/pkg/node/gateway.go index cc0184bb75..cc5b5c05a3 100644 --- a/go-controller/pkg/node/gateway.go +++ b/go-controller/pkg/node/gateway.go @@ -431,10 +431,15 @@ func bridgeForInterface(intfName, nodeName, physicalNetworkName string, gwIPs [] // gateway interface is an OVS bridge uplinkName, err := getIntfName(intfName) if err != nil { - return nil, errors.Wrapf(err, "Failed to find intfName for %s", intfName) + if config.Gateway.Mode == config.GatewayModeLocal && config.Gateway.AllowNoUplink { + klog.Infof("Could not find uplink for %s, setup gateway bridge with no uplink port, egress IP and egress GW will not work", intfName) + } else { + return nil, errors.Wrapf(err, "Failed to find intfName for %s", intfName) + } + } else { + res.uplinkName = uplinkName } res.bridgeName = intfName - res.uplinkName = uplinkName } var err error // Now, we get IP addresses for the bridge diff --git a/go-controller/pkg/node/gateway_init.go b/go-controller/pkg/node/gateway_init.go index de4fbfa9e6..73f1760504 100644 --- a/go-controller/pkg/node/gateway_init.go +++ b/go-controller/pkg/node/gateway_init.go @@ -1,6 +1,7 @@ package node import ( + "errors" "fmt" "net" "strings" @@ -189,7 +190,32 @@ func getGatewayNextHops() ([]net.IP, string, error) { if needIPv4NextHop || needIPv6NextHop || gatewayIntf == "" { defaultGatewayIntf, defaultGatewayNextHops, err := getDefaultGatewayInterfaceDetails(gatewayIntf, config.IPv4Mode, config.IPv6Mode) if err != nil { - return nil, "", err + if !(errors.As(err, new(*GatewayInterfaceMismatchError)) && config.Gateway.Mode == config.GatewayModeLocal && config.Gateway.AllowNoUplink) { + return nil, "", err + } + } + if gatewayIntf == "" { + if defaultGatewayIntf == "" { + return nil, "", fmt.Errorf("unable to find default gateway and none provided via config") + } + gatewayIntf = defaultGatewayIntf + } else { + if gatewayIntf != defaultGatewayIntf || len(defaultGatewayNextHops) == 0 { + if config.Gateway.Mode == config.GatewayModeLocal && config.Gateway.AllowNoUplink { + // For local gw, if not default gateway is available or the provide gateway interface is not the host gateway interface + // use nexthop masquerade IP as GR default gw to steer traffic to the gateway bridge + if needIPv4NextHop { + nexthop := net.ParseIP(types.V4DummyNextHopMasqueradeIP) + gatewayNextHops = append(gatewayNextHops, nexthop) + needIPv4NextHop = false + } + if needIPv6NextHop { + nexthop := net.ParseIP(types.V6DummyNextHopMasqueradeIP) + gatewayNextHops = append(gatewayNextHops, nexthop) + needIPv6NextHop = false + } + } + } } if needIPv4NextHop || needIPv6NextHop { for _, defaultGatewayNextHop := range defaultGatewayNextHops { @@ -200,12 +226,6 @@ func getGatewayNextHops() ([]net.IP, string, error) { } } } - if gatewayIntf == "" { - if defaultGatewayIntf == "" { - return nil, "", fmt.Errorf("unable to find default gateway and none provided via config") - } - gatewayIntf = defaultGatewayIntf - } } return gatewayNextHops, gatewayIntf, nil } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index 668a04de71..1dd182d511 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -6,6 +6,7 @@ package node import ( "bytes" "context" + "errors" "fmt" "net" "runtime" @@ -1734,5 +1735,125 @@ var _ = Describe("Gateway unit tests", func() { Expect(gatewayIntf).To(Equal(ifName)) Expect(gatewayNextHops[0]).To(Equal(gwIPs[0])) }) + + Context("In Local GW mode", func() { + ovntest.OnSupportedPlatformsIt("Finds correct gateway interface and nexthops when dummy gateway bridge is created", func() { + ifName := "enf1f0" + dummyBridgeName := "br-ex" + _, ipnet, err := net.ParseCIDR("0.0.0.0/0") + Expect(err).ToNot(HaveOccurred()) + hostGwIPs := []net.IP{net.ParseIP("10.0.0.11")} + lnk := &linkMock.Link{} + lnkAttr := &netlink.LinkAttrs{ + Name: ifName, + Index: 5, + } + defaultRoute := &netlink.Route{ + Dst: ipnet, + LinkIndex: 5, + Scope: netlink.SCOPE_UNIVERSE, + Gw: hostGwIPs[0], + MTU: config.Default.MTU, + } + lnk.On("Attrs").Return(lnkAttr) + netlinkMock.On("LinkByName", mock.Anything).Return(lnk, nil) + netlinkMock.On("LinkByIndex", mock.Anything).Return(lnk, nil) + netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{*defaultRoute}, nil) + + fexec := ovntest.NewLooseCompareFakeExec() + fexec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: fmt.Sprintf("ovs-vsctl --timeout=15 port-to-br %s", ifName), + Err: fmt.Errorf(""), + Output: "", + }) + err = util.SetExec(fexec) + Expect(err).NotTo(HaveOccurred()) + + gwIPs := []net.IP{net.ParseIP(types.V4DummyNextHopMasqueradeIP)} + config.Gateway.Interface = dummyBridgeName + config.Gateway.Mode = config.GatewayModeLocal + config.Gateway.AllowNoUplink = true + + gatewayNextHops, gatewayIntf, err := getGatewayNextHops() + Expect(err).NotTo(HaveOccurred()) + Expect(gatewayIntf).To(Equal(dummyBridgeName)) + Expect(gatewayNextHops[0]).To(Equal(gwIPs[0])) + }) + + ovntest.OnSupportedPlatformsIt("Finds correct gateway interface and nexthops when dummy gateway bridge is created and no default route", func() { + ifName := "enf1f0" + dummyBridgeName := "br-ex" + lnk := &linkMock.Link{} + lnkAttr := &netlink.LinkAttrs{ + Name: ifName, + Index: 5, + } + + lnk.On("Attrs").Return(lnkAttr) + netlinkMock.On("LinkByName", mock.Anything).Return(lnk, nil) + netlinkMock.On("LinkByIndex", mock.Anything).Return(lnk, nil) + netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{}, nil) + + fexec := ovntest.NewLooseCompareFakeExec() + fexec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: fmt.Sprintf("ovs-vsctl --timeout=15 port-to-br %s", ifName), + Err: fmt.Errorf(""), + Output: "", + }) + err := util.SetExec(fexec) + Expect(err).NotTo(HaveOccurred()) + + gwIPs := []net.IP{net.ParseIP(types.V4DummyNextHopMasqueradeIP)} + config.Gateway.Interface = dummyBridgeName + config.Gateway.Mode = config.GatewayModeLocal + config.Gateway.AllowNoUplink = true + + gatewayNextHops, gatewayIntf, err := getGatewayNextHops() + Expect(err).NotTo(HaveOccurred()) + Expect(gatewayIntf).To(Equal(dummyBridgeName)) + Expect(gatewayNextHops[0]).To(Equal(gwIPs[0])) + }) + + ovntest.OnSupportedPlatformsIt("Returns error when dummy gateway bridge is created without allow-no-uplink flag", func() { + ifName := "enf1f0" + dummyBridgeName := "br-ex" + _, ipnet, err := net.ParseCIDR("0.0.0.0/0") + Expect(err).ToNot(HaveOccurred()) + hostGwIPs := []net.IP{net.ParseIP("10.0.0.11")} + lnk := &linkMock.Link{} + lnkAttr := &netlink.LinkAttrs{ + Name: ifName, + Index: 5, + } + defaultRoute := &netlink.Route{ + Dst: ipnet, + LinkIndex: 5, + Scope: netlink.SCOPE_UNIVERSE, + Gw: hostGwIPs[0], + MTU: config.Default.MTU, + } + lnk.On("Attrs").Return(lnkAttr) + netlinkMock.On("LinkByName", mock.Anything).Return(lnk, nil) + netlinkMock.On("LinkByIndex", mock.Anything).Return(lnk, nil) + netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{*defaultRoute}, nil) + + fexec := ovntest.NewLooseCompareFakeExec() + fexec.AddFakeCmd(&ovntest.ExpectedCmd{ + Cmd: fmt.Sprintf("ovs-vsctl --timeout=15 port-to-br %s", ifName), + Err: fmt.Errorf(""), + Output: "", + }) + err = util.SetExec(fexec) + Expect(err).NotTo(HaveOccurred()) + + config.Gateway.Interface = dummyBridgeName + config.Gateway.Mode = config.GatewayModeLocal + + gatewayNextHops, gatewayIntf, err := getGatewayNextHops() + Expect(errors.As(err, new(*GatewayInterfaceMismatchError))).To(BeTrue()) + Expect(gatewayIntf).To(Equal("")) + Expect(len(gatewayNextHops)).To(Equal(0)) + }) + }) }) }) diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index 31a1cd9195..5baba7519f 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -1087,20 +1087,21 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st if config.IPv4Mode { // table0, Geneve packets coming from external. Skip conntrack and go directly to host // if dest mac is the shared mac send directly to host. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=205, in_port=%s, dl_dst=%s, udp, udp_dst=%d, "+ - "actions=output:%s", defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, config.Default.EncapPort, - ofPortHost)) - // perform NORMAL action otherwise. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+ - "actions=NORMAL", defaultOpenFlowCookie, ofPortPhys, config.Default.EncapPort)) - - // table0, Geneve packets coming from LOCAL. Skip conntrack and go directly to external - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+ - "actions=output:%s", defaultOpenFlowCookie, ovsLocalPort, config.Default.EncapPort, ofPortPhys)) + if ofPortPhys != "" { + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=205, in_port=%s, dl_dst=%s, udp, udp_dst=%d, "+ + "actions=output:%s", defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, config.Default.EncapPort, + ofPortHost)) + // perform NORMAL action otherwise. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+ + "actions=NORMAL", defaultOpenFlowCookie, ofPortPhys, config.Default.EncapPort)) + // table0, Geneve packets coming from LOCAL. Skip conntrack and go directly to external + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp, udp_dst=%d, "+ + "actions=output:%s", defaultOpenFlowCookie, ovsLocalPort, config.Default.EncapPort, ofPortPhys)) + } physicalIP, err := util.MatchFirstIPNetFamily(false, bridgeIPs) if err != nil { return nil, fmt.Errorf("unable to determine IPv4 physical IP of host: %v", err) @@ -1141,21 +1142,23 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st defaultOpenFlowCookie, ofPortHost, types.V4OVNMasqueradeIP, OVNMasqCTZone)) } if config.IPv6Mode { - // table0, Geneve packets coming from external. Skip conntrack and go directly to host - // if dest mac is the shared mac send directly to host. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=205, in_port=%s, dl_dst=%s, udp6, udp_dst=%d, "+ - "actions=output:%s", defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, config.Default.EncapPort, - ofPortHost)) - // perform NORMAL action otherwise. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp6, udp_dst=%d, "+ - "actions=NORMAL", defaultOpenFlowCookie, ofPortPhys, config.Default.EncapPort)) + if ofPortPhys != "" { + // table0, Geneve packets coming from external. Skip conntrack and go directly to host + // if dest mac is the shared mac send directly to host. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=205, in_port=%s, dl_dst=%s, udp6, udp_dst=%d, "+ + "actions=output:%s", defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, config.Default.EncapPort, + ofPortHost)) + // perform NORMAL action otherwise. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp6, udp_dst=%d, "+ + "actions=NORMAL", defaultOpenFlowCookie, ofPortPhys, config.Default.EncapPort)) - // table0, Geneve packets coming from LOCAL. Skip conntrack and send to external - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp6, udp_dst=%d, "+ - "actions=output:%s", defaultOpenFlowCookie, ovsLocalPort, config.Default.EncapPort, ofPortPhys)) + // table0, Geneve packets coming from LOCAL. Skip conntrack and send to external + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=200, in_port=%s, udp6, udp_dst=%d, "+ + "actions=output:%s", defaultOpenFlowCookie, ovsLocalPort, config.Default.EncapPort, ofPortPhys)) + } physicalIP, err := util.MatchFirstIPNetFamily(true, bridgeIPs) if err != nil { @@ -1233,59 +1236,61 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st actions := fmt.Sprintf("output:%s", ofPortPatch) - if config.IPv4Mode { - // table 1, established and related connections in zone 64000 with ct_mark ctMarkOVN go to OVN - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+est, ct_mark=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ctMarkOVN, actions)) + if ofPortPhys != "" { + if config.IPv4Mode { + // table 1, established and related connections in zone 64000 with ct_mark ctMarkOVN go to OVN + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+est, ct_mark=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ctMarkOVN, actions)) - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ctMarkOVN, actions)) + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ctMarkOVN, actions)) - // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+est, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+est, ct_mark=%s, "+ + "actions=output:%s", + defaultOpenFlowCookie, ctMarkHost, ofPortHost)) - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) - } + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=output:%s", + defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + } - if config.IPv6Mode { - // table 1, established and related connections in zone 64000 with ct_mark ctMarkOVN go to OVN - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ipv6, ct_state=+trk+est, ct_mark=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ctMarkOVN, actions)) + if config.IPv6Mode { + // table 1, established and related connections in zone 64000 with ct_mark ctMarkOVN go to OVN + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ipv6, ct_state=+trk+est, ct_mark=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ctMarkOVN, actions)) - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ipv6, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ctMarkOVN, actions)) + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ipv6, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ctMarkOVN, actions)) - // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+est, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+est, ct_mark=%s, "+ + "actions=output:%s", + defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=output:%s", + defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + } + // table 1, we check to see if this dest mac is the shared mac, if so send to host dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", + defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) } - // table 1, we check to see if this dest mac is the shared mac, if so send to host - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", - defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) - // table 2, dispatch from Host -> OVN dftFlows = append(dftFlows, fmt.Sprintf("cookie=%s, table=2, "+ @@ -1336,40 +1341,43 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration) ([]string, e var dftFlows []string - // table 0, we check to see if this dest mac is the shared mac, if so flood to both ports - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=10, table=0, in_port=%s, dl_dst=%s, actions=output:%s,output:%s", - defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, ofPortPatch, ofPortHost)) + if ofPortPhys != "" { + // table 0, we check to see if this dest mac is the shared mac, if so flood to both ports + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=10, table=0, in_port=%s, dl_dst=%s, actions=output:%s,output:%s", + defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, ofPortPatch, ofPortHost)) + } if config.IPv4Mode { physicalIP, err := util.MatchFirstIPNetFamily(false, bridgeIPs) if err != nil { return nil, fmt.Errorf("unable to determine IPv4 physical IP of host: %v", err) } - // table0, packets coming from egressIP pods that have mark 1008 on them - // will be DNAT-ed a final time into nodeIP to maintain consistency in traffic even if the GR - // DNATs these into egressIP prior to reaching external bridge. - // egressService pods will also undergo this SNAT to nodeIP since these features are tied - // together at the OVN policy level on the distributed router. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=105, in_port=%s, ip, pkt_mark=%s "+ - "actions=ct(commit, zone=%d, nat(src=%s), exec(set_field:%s->ct_mark)),output:%s", - defaultOpenFlowCookie, ofPortPatch, ovnKubeNodeSNATMark, config.Default.ConntrackZone, physicalIP.IP, ctMarkOVN, ofPortPhys)) - - // table 0, packets coming from pods headed externally. Commit connections with ct_mark ctMarkOVN - // so that reverse direction goes back to the pods. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ip, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortPatch, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) + if ofPortPhys != "" { + // table0, packets coming from egressIP pods that have mark 1008 on them + // will be DNAT-ed a final time into nodeIP to maintain consistency in traffic even if the GR + // DNATs these into egressIP prior to reaching external bridge. + // egressService pods will also undergo this SNAT to nodeIP since these features are tied + // together at the OVN policy level on the distributed router. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=105, in_port=%s, ip, pkt_mark=%s "+ + "actions=ct(commit, zone=%d, nat(src=%s), exec(set_field:%s->ct_mark)),output:%s", + defaultOpenFlowCookie, ofPortPatch, ovnKubeNodeSNATMark, config.Default.ConntrackZone, physicalIP.IP, ctMarkOVN, ofPortPhys)) - // table 0, packets coming from host Commit connections with ct_mark ctMarkHost - // so that reverse direction goes back to the host. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ip, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + // table 0, packets coming from pods headed externally. Commit connections with ct_mark ctMarkOVN + // so that reverse direction goes back to the pods. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ip, "+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortPatch, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) + // table 0, packets coming from host Commit connections with ct_mark ctMarkHost + // so that reverse direction goes back to the host. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ip, "+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + } if config.Gateway.Mode == config.GatewayModeLocal { // table 0, any packet coming from OVN send to host in LGW mode, host will take care of sending it outside if needed. // exceptions are traffic for egressIP and egressGW features and ICMP related traffic which will hit the priority 100 flow instead of this. @@ -1386,46 +1394,51 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration) ([]string, e "actions=ct(table=4,zone=%d)", defaultOpenFlowCookie, ofPortPatch, physicalIP.IP, HostMasqCTZone)) // We send BFD traffic coming from OVN to outside directly using a higher priority flow - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=650, table=0, in_port=%s, udp, tp_dst=3784, actions=output:%s", - defaultOpenFlowCookie, ofPortPatch, ofPortPhys)) + if ofPortPhys != "" { + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=650, table=0, in_port=%s, udp, tp_dst=3784, actions=output:%s", + defaultOpenFlowCookie, ofPortPatch, ofPortPhys)) + } } - // table 0, packets coming from external. Send it through conntrack and - // resubmit to table 1 to know the state and mark of the connection. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=50, in_port=%s, ip, "+ - "actions=ct(zone=%d, nat, table=1)", defaultOpenFlowCookie, ofPortPhys, config.Default.ConntrackZone)) + if ofPortPhys != "" { + // table 0, packets coming from external. Send it through conntrack and + // resubmit to table 1 to know the state and mark of the connection. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=50, in_port=%s, ip, "+ + "actions=ct(zone=%d, nat, table=1)", defaultOpenFlowCookie, ofPortPhys, config.Default.ConntrackZone)) + } } if config.IPv6Mode { physicalIP, err := util.MatchFirstIPNetFamily(true, bridgeIPs) if err != nil { return nil, fmt.Errorf("unable to determine IPv6 physical IP of host: %v", err) } - // table0, packets coming from egressIP pods that have mark 1008 on them - // will be DNAT-ed a final time into nodeIP to maintain consistency in traffic even if the GR - // DNATs these into egressIP prior to reaching external bridge. - // egressService pods will also undergo this SNAT to nodeIP since these features are tied - // together at the OVN policy level on the distributed router. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=105, in_port=%s, ipv6, pkt_mark=%s "+ - "actions=ct(commit, zone=%d, nat(src=%s), exec(set_field:%s->ct_mark)),output:%s", - defaultOpenFlowCookie, ofPortPatch, ovnKubeNodeSNATMark, config.Default.ConntrackZone, physicalIP.IP, ctMarkOVN, ofPortPhys)) - - // table 0, packets coming from pods headed externally. Commit connections with ct_mark ctMarkOVN - // so that reverse direction goes back to the pods. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ipv6, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortPatch, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) + if ofPortPhys != "" { + // table0, packets coming from egressIP pods that have mark 1008 on them + // will be DNAT-ed a final time into nodeIP to maintain consistency in traffic even if the GR + // DNATs these into egressIP prior to reaching external bridge. + // egressService pods will also undergo this SNAT to nodeIP since these features are tied + // together at the OVN policy level on the distributed router. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=105, in_port=%s, ipv6, pkt_mark=%s "+ + "actions=ct(commit, zone=%d, nat(src=%s), exec(set_field:%s->ct_mark)),output:%s", + defaultOpenFlowCookie, ofPortPatch, ovnKubeNodeSNATMark, config.Default.ConntrackZone, physicalIP.IP, ctMarkOVN, ofPortPhys)) - // table 0, packets coming from host. Commit connections with ct_mark ctMarkHost - // so that reverse direction goes back to the host. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ipv6, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + // table 0, packets coming from pods headed externally. Commit connections with ct_mark ctMarkOVN + // so that reverse direction goes back to the pods. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ipv6, "+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortPatch, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) + // table 0, packets coming from host. Commit connections with ct_mark ctMarkHost + // so that reverse direction goes back to the host. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ipv6, "+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + } if config.Gateway.Mode == config.GatewayModeLocal { // table 0, any packet coming from OVN send to host in LGW mode, host will take care of sending it outside if needed. // exceptions are traffic for egressIP and egressGW features and ICMP related traffic which will hit the priority 100 flow instead of this. @@ -1441,18 +1454,21 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration) ([]string, e fmt.Sprintf("cookie=%s, priority=175, in_port=%s, sctp6, ipv6_src=%s, "+ "actions=ct(table=4,zone=%d)", defaultOpenFlowCookie, ofPortPatch, physicalIP.IP, HostMasqCTZone)) - // We send BFD traffic coming from OVN to outside directly using a higher priority flow + if ofPortPhys != "" { + // We send BFD traffic coming from OVN to outside directly using a higher priority flow + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=650, table=0, in_port=%s, udp6, tp_dst=3784, actions=output:%s", + defaultOpenFlowCookie, ofPortPatch, ofPortPhys)) + } + } + if ofPortPhys != "" { + // table 0, packets coming from external. Send it through conntrack and + // resubmit to table 1 to know the state and mark of the connection. dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=650, table=0, in_port=%s, udp6, tp_dst=3784, actions=output:%s", - defaultOpenFlowCookie, ofPortPatch, ofPortPhys)) + fmt.Sprintf("cookie=%s, priority=50, in_port=%s, ipv6, "+ + "actions=ct(zone=%d, nat, table=1)", defaultOpenFlowCookie, ofPortPhys, config.Default.ConntrackZone)) } - // table 0, packets coming from external. Send it through conntrack and - // resubmit to table 1 to know the state and mark of the connection. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=50, in_port=%s, ipv6, "+ - "actions=ct(zone=%d, nat, table=1)", defaultOpenFlowCookie, ofPortPhys, config.Default.ConntrackZone)) } - // Egress IP is often configured on a node different from the one hosting the affected pod. // Due to the fact that ovn-controllers on different nodes apply the changes independently, // there is a chance that the pod traffic will reach the egress node before it configures the SNAT flows. @@ -1474,65 +1490,71 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration) ([]string, e if utilnet.IsIPv6CIDR(subnet) { ipPrefix = "ipv6" } - // table 0, commit connections from local pods. - // ICNIv2 requires that local pod traffic can leave the node without SNAT. - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=109, in_port=%s, %s, %s_src=%s"+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortPatch, ipPrefix, ipPrefix, subnet, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) - } - } - - actions := fmt.Sprintf("output:%s", ofPortPatch) - - if config.Gateway.DisableSNATMultipleGWs { - // table 1, traffic to pod subnet go directly to OVN - for _, clusterEntry := range config.Default.ClusterSubnets { - cidr := clusterEntry.CIDR - var ipPrefix string - if utilnet.IsIPv6CIDR(cidr) { - ipPrefix = "ipv6" - } else { - ipPrefix = "ip" + if ofPortPhys != "" { + // table 0, commit connections from local pods. + // ICNIv2 requires that local pod traffic can leave the node without SNAT. + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=109, in_port=%s, %s, %s_src=%s"+ + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", + defaultOpenFlowCookie, ofPortPatch, ipPrefix, ipPrefix, subnet, config.Default.ConntrackZone, ctMarkOVN, ofPortPhys)) } - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=15, table=1, %s, %s_dst=%s, "+ - "actions=%s", - defaultOpenFlowCookie, ipPrefix, ipPrefix, cidr, actions)) } } - // table 1, we check to see if this dest mac is the shared mac, if so send to host - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", - defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) + if ofPortPhys != "" { + actions := fmt.Sprintf("output:%s", ofPortPatch) - if config.IPv6Mode { - // REMOVEME(trozet) when https://bugzilla.kernel.org/show_bug.cgi?id=11797 is resolved - // must flood icmpv6 Route Advertisement and Neighbor Advertisement traffic as it fails to create a CT entry - for _, icmpType := range []int{types.RouteAdvertisementICMPType, types.NeighborAdvertisementICMPType} { - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=14, table=1,icmp6,icmpv6_type=%d actions=FLOOD", - defaultOpenFlowCookie, icmpType)) + if config.Gateway.DisableSNATMultipleGWs { + // table 1, traffic to pod subnet go directly to OVN + for _, clusterEntry := range config.Default.ClusterSubnets { + cidr := clusterEntry.CIDR + var ipPrefix string + if utilnet.IsIPv6CIDR(cidr) { + ipPrefix = "ipv6" + } else { + ipPrefix = "ip" + } + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=15, table=1, %s, %s_dst=%s, "+ + "actions=%s", + defaultOpenFlowCookie, ipPrefix, ipPrefix, cidr, actions)) + } } - // We send BFD traffic both on the host and in ovn + // table 1, we check to see if this dest mac is the shared mac, if so send to host dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=13, table=1, in_port=%s, udp6, tp_dst=3784, actions=output:%s,output:%s", - defaultOpenFlowCookie, ofPortPhys, ofPortPatch, ofPortHost)) - } + fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", + defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) + + if config.IPv6Mode { + // REMOVEME(trozet) when https://bugzilla.kernel.org/show_bug.cgi?id=11797 is resolved + // must flood icmpv6 Route Advertisement and Neighbor Advertisement traffic as it fails to create a CT entry + for _, icmpType := range []int{types.RouteAdvertisementICMPType, types.NeighborAdvertisementICMPType} { + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=14, table=1,icmp6,icmpv6_type=%d actions=FLOOD", + defaultOpenFlowCookie, icmpType)) + } + if ofPortPhys != "" { + // We send BFD traffic both on the host and in ovn + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=13, table=1, in_port=%s, udp6, tp_dst=3784, actions=output:%s,output:%s", + defaultOpenFlowCookie, ofPortPhys, ofPortPatch, ofPortHost)) + } + } - if config.IPv4Mode { - // We send BFD traffic both on the host and in ovn + if config.IPv4Mode { + if ofPortPhys != "" { + // We send BFD traffic both on the host and in ovn + dftFlows = append(dftFlows, + fmt.Sprintf("cookie=%s, priority=13, table=1, in_port=%s, udp, tp_dst=3784, actions=output:%s,output:%s", + defaultOpenFlowCookie, ofPortPhys, ofPortPatch, ofPortHost)) + } + } + // table 1, all other connections do normal processing dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=13, table=1, in_port=%s, udp, tp_dst=3784, actions=output:%s,output:%s", - defaultOpenFlowCookie, ofPortPhys, ofPortPatch, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=0, table=1, actions=output:NORMAL", defaultOpenFlowCookie)) } - // table 1, all other connections do normal processing - dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=0, table=1, actions=output:NORMAL", defaultOpenFlowCookie)) - return dftFlows, nil } @@ -1543,15 +1565,17 @@ func setBridgeOfPorts(bridge *bridgeConfiguration) error { return fmt.Errorf("failed while waiting on patch port %q to be created by ovn-controller and "+ "while getting ofport. stderr: %q, error: %v", bridge.patchPort, stderr, err) } + bridge.ofPortPatch = ofportPatch - // Get ofport of physical interface - ofportPhys, stderr, err := util.GetOVSOfPort("get", "interface", bridge.uplinkName, "ofport") - if err != nil { - return fmt.Errorf("failed to get ofport of %s, stderr: %q, error: %v", - bridge.uplinkName, stderr, err) + if bridge.uplinkName != "" { + // Get ofport of physical interface + ofportPhys, stderr, err := util.GetOVSOfPort("get", "interface", bridge.uplinkName, "ofport") + if err != nil { + return fmt.Errorf("failed to get ofport of %s, stderr: %q, error: %v", + bridge.uplinkName, stderr, err) + } + bridge.ofPortPhys = ofportPhys } - bridge.ofPortPatch = ofportPatch - bridge.ofPortPhys = ofportPhys // Get ofport represeting the host. That is, host representor port in case of DPUs, ovsLocalPort otherwise. if config.OvnKubeNode.Mode == types.NodeModeDPU { diff --git a/go-controller/pkg/node/helper_linux.go b/go-controller/pkg/node/helper_linux.go index ea86b10814..79458eeb0b 100644 --- a/go-controller/pkg/node/helper_linux.go +++ b/go-controller/pkg/node/helper_linux.go @@ -13,6 +13,20 @@ import ( "k8s.io/klog/v2" ) +type GatewayInterfaceMismatchError struct { + msg string +} + +func (error *GatewayInterfaceMismatchError) Error() string { + return error.msg +} + +func newGatewayInterfaceMismatchError(format string, args ...interface{}) *GatewayInterfaceMismatchError { + return &GatewayInterfaceMismatchError{ + msg: fmt.Sprintf(format, args...), + } +} + // getDefaultGatewayInterfaceDetails returns the interface name on // which the default gateway (for route to 0.0.0.0) is configured. // optionally pass the pre-determined gateway interface @@ -103,7 +117,7 @@ func getDefaultGatewayInterfaceByFamily(family int, gwIface string) (string, net klog.Infof("Found default gateway interface %s %s", foundIfName, r.Gw.String()) if len(gwIface) > 0 && gwIface != foundIfName { // this should not happen, but if it did, indicates something broken with our use of the netlink lib - return "", nil, fmt.Errorf("mistmaching provided gw interface: %s and gateway found: %s", + return "", nil, newGatewayInterfaceMismatchError("mismatching provided gw interface: %s and gateway found: %s", gwIface, foundIfName) } return foundIfName, r.Gw, nil @@ -129,7 +143,7 @@ func getDefaultGatewayInterfaceByFamily(family int, gwIface string) (string, net klog.Infof("Found default gateway interface %s %s", foundIfName, nh.Gw.String()) if len(gwIface) > 0 && gwIface != foundIfName { // this should not happen, but if it did, indicates something broken with our use of the netlink lib - return "", nil, fmt.Errorf("mistmaching provided gw interface: %q and gateway found: %q", + return "", nil, newGatewayInterfaceMismatchError("mismatching provided gw interface: %q and gateway found: %q", gwIface, foundIfName) } return foundIfName, nh.Gw, nil From e80027af2b043d3577bcdb3ddb245a0c84262691 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Wed, 24 May 2023 10:45:59 +0800 Subject: [PATCH 12/73] Use no-uplink gateway bridge in compact-mode e2e test Signed-off-by: Peng Liu --- .github/workflows/test.yml | 1 + contrib/kind.sh | 10 +++++++++- dist/images/daemonset.sh | 11 +++++++++++ dist/images/ovnkube.sh | 2 +- dist/templates/ovnkube-master.yaml.j2 | 14 ++++++++++++++ dist/templates/ovnkube-node.yaml.j2 | 13 ++++++++++++- 6 files changed, 48 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ba83051518..5b5c877c8b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -399,6 +399,7 @@ jobs: ENABLE_MULTI_NET: "${{ matrix.target == 'multi-homing' }}" OVN_SEPARATE_CLUSTER_MANAGER: "${{ matrix.separate-cluster-manager == 'true' }}" OVN_COMPACT_MODE: "${{ matrix.target == 'compact-mode' }}" + OVN_DUMMY_GATEWAY_BRIDGE: "${{ matrix.target == 'compact-mode' }}" steps: - name: Free up disk space diff --git a/contrib/kind.sh b/contrib/kind.sh index 9a9060a5bf..e451cc6d2f 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -249,6 +249,8 @@ parse_args() { fi OVN_GATEWAY_MODE=$1 ;; + -dgb | --dummy-gateway-bridge) OVN_DUMMY_GATEWAY_BRIDGE=true + ;; -ov | --ovn-image ) shift OVN_IMAGE=$1 ;; @@ -361,6 +363,7 @@ print_params() { echo "KIND_ALLOW_SYSTEM_WRITES = $KIND_ALLOW_SYSTEM_WRITES" echo "KIND_EXPERIMENTAL_PROVIDER = $KIND_EXPERIMENTAL_PROVIDER" echo "OVN_GATEWAY_MODE = $OVN_GATEWAY_MODE" + echo "OVN_DUMMY_GATEWAY_BRIDGE = $OVN_DUMMY_GATEWAY_BRIDGE" echo "OVN_HYBRID_OVERLAY_ENABLE = $OVN_HYBRID_OVERLAY_ENABLE" echo "OVN_DISABLE_SNAT_MULTIPLE_GWS = $OVN_DISABLE_SNAT_MULTIPLE_GWS" echo "OVN_DISABLE_FORWARDING = $OVN_DISABLE_FORWARDING" @@ -543,10 +546,14 @@ set_default_params() { OVN_DEPLOY_PODS=${OVN_DEPLOY_PODS:-"ovnkube-master ovnkube-node"} OVN_METRICS_SCALE_ENABLE=${OVN_METRICS_SCALE_ENABLE:-false} OVN_ISOLATED=${OVN_ISOLATED:-false} - OVN_GATEWAY_OPTS="" + OVN_GATEWAY_OPTS=${OVN_GATEWAY_OPTS:-""} if [ "$OVN_ISOLATED" == true ]; then OVN_GATEWAY_OPTS="--gateway-interface=eth0" fi + OVN_DUMMY_GATEWAY_BRIDGE=${OVN_DUMMY_GATEWAY_BRIDGE:-false} + if [ "$OVN_DUMMY_GATEWAY_BRIDGE" == true ]; then + OVN_GATEWAY_OPTS="--allow-no-uplink --gateway-interface=br-ex" + fi ENABLE_MULTI_NET=${ENABLE_MULTI_NET:-false} OVN_SEPARATE_CLUSTER_MANAGER=${OVN_SEPARATE_CLUSTER_MANAGER:-false} OVN_COMPACT_MODE=${OVN_COMPACT_MODE:-false} @@ -780,6 +787,7 @@ create_ovn_kube_manifests() { --net-cidr="${NET_CIDR}" \ --svc-cidr="${SVC_CIDR}" \ --gateway-mode="${OVN_GATEWAY_MODE}" \ + --dummy-gateway-bridge="${OVN_DUMMY_GATEWAY_BRIDGE}" \ --gateway-options="${OVN_GATEWAY_OPTS}" \ --enable-ipsec="${ENABLE_IPSEC}" \ --hybrid-enabled="${OVN_HYBRID_OVERLAY_ENABLE}" \ diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index 5711ee3017..b5865a7e93 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -31,6 +31,7 @@ OVN_SVC_CIDR="" OVN_K8S_APISERVER="" OVN_GATEWAY_MODE="" OVN_GATEWAY_OPTS="" +OVN_DUMMY_GATEWAY_BRIDGE="" OVN_DB_REPLICAS="" OVN_MTU="" OVN_SSL_ENABLE="" @@ -108,6 +109,9 @@ while [ "$1" != "" ]; do --gateway-options) OVN_GATEWAY_OPTS=$VALUE ;; + --dummy-gateway-bridge) + OVN_DUMMY_GATEWAY_BRIDGE=$VALUE + ;; --enable-ipsec) ENABLE_IPSEC=$VALUE ;; @@ -328,6 +332,9 @@ echo "ovn_gateway_mode: ${ovn_gateway_mode}" ovn_gateway_opts=${OVN_GATEWAY_OPTS} echo "ovn_gateway_opts: ${ovn_gateway_opts}" +ovn_dummy_gateway_bridge=${OVN_DUMMY_GATEWAY_BRIDGE} +echo "ovn_dummy_gateway_bridge: ${ovn_dummy_gateway_bridge}" + enable_ipsec=${ENABLE_IPSEC:-false} echo "enable_ipsec: ${enable_ipsec}" @@ -450,6 +457,7 @@ ovn_image=${ovnkube_image} \ ovn_unprivileged_mode=${ovn_unprivileged_mode} \ ovn_gateway_mode=${ovn_gateway_mode} \ ovn_gateway_opts=${ovn_gateway_opts} \ + ovn_dummy_gateway_bridge=${ovn_dummy_gateway_bridge} \ ovnkube_node_loglevel=${node_loglevel} \ ovn_loglevel_controller=${ovn_loglevel_controller} \ ovnkube_logfile_maxsize=${ovnkube_logfile_maxsize} \ @@ -495,6 +503,7 @@ ovn_image=${image} \ ovn_unprivileged_mode=${ovn_unprivileged_mode} \ ovn_gateway_mode=${ovn_gateway_mode} \ ovn_gateway_opts=${ovn_gateway_opts} \ + ovn_dummy_gateway_bridge=${ovn_dummy_gateway_bridge} \ ovnkube_node_loglevel=${node_loglevel} \ ovn_loglevel_controller=${ovn_loglevel_controller} \ ovnkube_logfile_maxsize=${ovnkube_logfile_maxsize} \ @@ -550,6 +559,8 @@ ovn_image=${ovnkube_image} \ ovn_ssl_en=${ovn_ssl_en} \ ovn_master_count=${ovn_master_count} \ ovn_gateway_mode=${ovn_gateway_mode} \ + ovn_gateway_opts=${ovn_gateway_opts} \ + ovn_dummy_gateway_bridge=${ovn_dummy_gateway_bridge} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ ovn_stateless_netpol_enable=${ovn_netpol_acl_enable} \ ovnkube_compact_mode_enable=${ovnkube_compact_mode_enable} \ diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index e56d4dbf88..a3c6203aa0 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -1047,7 +1047,7 @@ ovn-master() { ${init_node_flags} \ --cluster-subnets ${net_cidr} --k8s-service-cidr=${svc_cidr} \ --nb-address=${ovn_nbdb} --sb-address=${ovn_sbdb} \ - --gateway-mode=${ovn_gateway_mode} \ + --gateway-mode=${ovn_gateway_mode} ${ovn_gateway_opts} \ --loglevel=${ovnkube_loglevel} \ --logfile-maxsize=${ovnkube_logfile_maxsize} \ --logfile-maxbackups=${ovnkube_logfile_maxbackups} \ diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index 59d47d1f10..01301b5cf5 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -138,6 +138,18 @@ spec: add: - NET_ADMIN {% endif %} + {% if ovn_dummy_gateway_bridge=="true" %} + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + #!/bin/bash + ovs-vsctl --may-exist add-br br-ex + ip a a dev br-ex 10.44.0.1/32 || /bin/true + {% endif %} {% else %} command: ["/root/ovnkube.sh", "ovn-master"] securityContext: @@ -259,6 +271,8 @@ spec: value: "{{ ovn_ssl_en }}" - name: OVN_GATEWAY_MODE value: "{{ ovn_gateway_mode }}" + - name: OVN_GATEWAY_OPTS + value: "{{ ovn_gateway_opts }}" - name: OVN_MULTICAST_ENABLE value: "{{ ovn_multicast_enable }}" - name: OVN_ACL_LOGGING_RATE_LIMIT diff --git a/dist/templates/ovnkube-node.yaml.j2 b/dist/templates/ovnkube-node.yaml.j2 index e66701c070..d05a97428a 100644 --- a/dist/templates/ovnkube-node.yaml.j2 +++ b/dist/templates/ovnkube-node.yaml.j2 @@ -39,7 +39,18 @@ spec: - name: ovnkube-node image: "{{ ovn_image | default('docker.io/ovnkube/ovn-daemonset:latest') }}" imagePullPolicy: "{{ ovn_image_pull_policy | default('IfNotPresent') }}" - + {% if ovn_dummy_gateway_bridge=="true" %} + lifecycle: + postStart: + exec: + command: + - /bin/bash + - -c + - | + #!/bin/bash + ovs-vsctl --may-exist add-br br-ex + ip a a dev br-ex 10.44.0.1/32 || /bin/true + {% endif %} command: ["/root/ovnkube.sh", "ovn-node"] securityContext: From 5acded903869de12895f8330447e2f7b5e382653 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 5 May 2023 12:07:29 +0200 Subject: [PATCH 13/73] e2e: improve the error message of the getNetCIDRSubnet func Signed-off-by: Miguel Duarte Barroso --- test/e2e/multihoming_utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/multihoming_utils.go b/test/e2e/multihoming_utils.go index 98bddbb5b4..0103a7d2e2 100644 --- a/test/e2e/multihoming_utils.go +++ b/test/e2e/multihoming_utils.go @@ -29,7 +29,7 @@ func getNetCIDRSubnet(netCIDR string) (string, error) { } else if len(subStrings) == 2 { return netCIDR, nil } - return "", fmt.Errorf("invalid network cidr %s", netCIDR) + return "", fmt.Errorf("invalid network cidr: %q", netCIDR) } type networkAttachmentConfig struct { From 8ae10c31860bc48de7d4331d086b9c2ef7d85f3c Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 11 May 2023 17:42:31 +0200 Subject: [PATCH 14/73] multi-net policies, IPAM less: fail when converting invalid policies Network policies targeting IPAM less networks can only have `ipBlock` peers; while this behavior already existed, it now fails a lot earlier, when translating the multi-net policies into regular `NetworkPolicy`, thus making the flow more explicit, efficient, and readable. Signed-off-by: Miguel Duarte Barroso --- .../base_network_controller_multipolicy.go | 22 ++ ...ase_network_controller_multipolicy_test.go | 214 ++++++++++++++++++ .../ovn/base_network_controller_secondary.go | 20 +- 3 files changed, 252 insertions(+), 4 deletions(-) create mode 100644 go-controller/pkg/ovn/base_network_controller_multipolicy_test.go diff --git a/go-controller/pkg/ovn/base_network_controller_multipolicy.go b/go-controller/pkg/ovn/base_network_controller_multipolicy.go index cd2a8aab85..ebb57ad1e1 100644 --- a/go-controller/pkg/ovn/base_network_controller_multipolicy.go +++ b/go-controller/pkg/ovn/base_network_controller_multipolicy.go @@ -122,3 +122,25 @@ func convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy) *knet. } return &policy } + +func (bnc *BaseNetworkController) convertMultiNetPolicyToNetPolicy(mpolicy *mnpapi.MultiNetworkPolicy) (*knet.NetworkPolicy, error) { + if !bnc.doesNetworkRequireIPAM() { + var peers []mnpapi.MultiNetworkPolicyPeer + for _, rule := range mpolicy.Spec.Ingress { + peers = append(peers, rule.From...) + } + for _, rule := range mpolicy.Spec.Egress { + peers = append(peers, rule.To...) + } + for _, peer := range peers { + if doesPeerRequireNetworkIPAM(peer) { + return nil, fmt.Errorf("invalid peer %v in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", peer, mpolicy.Name) + } + } + } + return convertMultiNetPolicyToNetPolicy(mpolicy), nil +} + +func doesPeerRequireNetworkIPAM(peer mnpapi.MultiNetworkPolicyPeer) bool { + return peer.PodSelector != nil || peer.NamespaceSelector != nil +} diff --git a/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go b/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go new file mode 100644 index 0000000000..4692b73fc7 --- /dev/null +++ b/go-controller/pkg/ovn/base_network_controller_multipolicy_test.go @@ -0,0 +1,214 @@ +package ovn + +import ( + "fmt" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + netv1 "k8s.io/api/networking/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/k8snetworkplumbingwg/multi-networkpolicy/pkg/apis/k8s.cni.cncf.io/v1beta1" + netplumbersv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" +) + +var _ = Describe("convertMultiNetPolicyToNetPolicy", func() { + const policyName = "pol33" + + var nci *CommonNetworkControllerInfo + + BeforeEach(func() { + nci = &CommonNetworkControllerInfo{nbClient: nil} + }) + + It("translates an IPAM policy with namespace selectors", func() { + nInfo, err := util.ParseNADInfo(ipamNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithNamespaceSelector(policyName))).To( + Equal( + &netv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{ + { + From: []netv1.NetworkPolicyPeer{{NamespaceSelector: sameLabelsEverywhere()}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + Egress: []netv1.NetworkPolicyEgressRule{}, + PolicyTypes: []netv1.PolicyType{}, + }, + })) + }) + + It("translates an IPAM policy with pod selectors", func() { + nInfo, err := util.ParseNADInfo(ipamNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithPodSelector(policyName))).To( + Equal( + &netv1.NetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{ + { + From: []netv1.NetworkPolicyPeer{{PodSelector: sameLabelsEverywhere()}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + Egress: []netv1.NetworkPolicyEgressRule{}, + PolicyTypes: []netv1.PolicyType{}, + }, + })) + }) + + It("translates an IPAM policy with `ipBlock` selectors", func() { + nInfo, err := util.ParseNADInfo(ipamNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIPBlock())).To(Equal( + &netv1.NetworkPolicy{ + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{ + { + From: []netv1.NetworkPolicyPeer{{IPBlock: &netv1.IPBlock{CIDR: "10.10.0.0/16"}}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + Egress: []netv1.NetworkPolicyEgressRule{}, + PolicyTypes: []netv1.PolicyType{}, + }, + }, + )) + }) + + It("translates an IPAM-less policy with `ipBlock` selectors", func() { + nInfo, err := util.ParseNADInfo(ipamlessNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + Expect(bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithIPBlock())).To( + Equal( + &netv1.NetworkPolicy{ + Spec: netv1.NetworkPolicySpec{ + Ingress: []netv1.NetworkPolicyIngressRule{ + { + From: []netv1.NetworkPolicyPeer{{IPBlock: &netv1.IPBlock{CIDR: "10.10.0.0/16"}}}, + Ports: []netv1.NetworkPolicyPort{}, + }, + }, + Egress: []netv1.NetworkPolicyEgressRule{}, + PolicyTypes: []netv1.PolicyType{}, + }, + }, + )) + }) + + It("*fails* to translate an IPAM-less policy with pod selector peers", func() { + nInfo, err := util.ParseNADInfo(ipamlessNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + _, err = bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithPodSelector(policyName)) + Expect(err).To( + MatchError( + MatchRegexp(fmt.Sprintf("invalid peer .* in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", policyName)))) + }) + + It("translates an IPAM-less policy with namespace selector peers", func() { + nInfo, err := util.ParseNADInfo(ipamlessNetAttachDef()) + Expect(err).NotTo(HaveOccurred()) + bnc := NewSecondaryLayer2NetworkController(nci, nInfo) + _, err = bnc.convertMultiNetPolicyToNetPolicy(multiNetPolicyWithNamespaceSelector(policyName)) + Expect(err).To(MatchError( + MatchRegexp(fmt.Sprintf("invalid peer .* in multi-network policy %s; IPAM-less networks can only have `ipBlock` peers", policyName)))) + }) +}) + +func sameLabelsEverywhere() *metav1.LabelSelector { + return &metav1.LabelSelector{ + MatchLabels: map[string]string{"George": "Costanza"}, + } +} + +func ipamNetAttachDef() *netplumbersv1.NetworkAttachmentDefinition { + return &netplumbersv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "flatl2", + Namespace: "default", + }, + Spec: netplumbersv1.NetworkAttachmentDefinitionSpec{ + Config: `{ + "cniVersion": "0.4.0", + "name": "flatl2", + "netAttachDefName": "default/flatl2", + "topology": "layer2", + "type": "ovn-k8s-cni-overlay", + "subnets": "192.100.200.0/24" + }`, + }, + } +} + +func ipamlessNetAttachDef() *netplumbersv1.NetworkAttachmentDefinition { + return &netplumbersv1.NetworkAttachmentDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: "flatl2", + Namespace: "default", + }, + Spec: netplumbersv1.NetworkAttachmentDefinitionSpec{ + Config: `{ + "cniVersion": "0.4.0", + "name": "flatl2", + "netAttachDefName": "default/flatl2", + "topology": "layer2", + "type": "ovn-k8s-cni-overlay" + }`, + }, + } +} +func multiNetPolicyWithIPBlock() *v1beta1.MultiNetworkPolicy { + return &v1beta1.MultiNetworkPolicy{ + Spec: v1beta1.MultiNetworkPolicySpec{ + Ingress: []v1beta1.MultiNetworkPolicyIngressRule{ + { + From: []v1beta1.MultiNetworkPolicyPeer{ + { + IPBlock: &v1beta1.IPBlock{ + CIDR: "10.10.0.0/16", + }, + }, + }, + }, + }, + }, + } +} + +func multiNetPolicyWithPodSelector(policyName string) *v1beta1.MultiNetworkPolicy { + return &v1beta1.MultiNetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: v1beta1.MultiNetworkPolicySpec{ + Ingress: []v1beta1.MultiNetworkPolicyIngressRule{ + { + From: []v1beta1.MultiNetworkPolicyPeer{{PodSelector: sameLabelsEverywhere()}}, + }, + }, + }, + } +} + +func multiNetPolicyWithNamespaceSelector(policyName string) *v1beta1.MultiNetworkPolicy { + return &v1beta1.MultiNetworkPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: policyName}, + Spec: v1beta1.MultiNetworkPolicySpec{ + Ingress: []v1beta1.MultiNetworkPolicyIngressRule{ + { + From: []v1beta1.MultiNetworkPolicyPeer{{NamespaceSelector: sameLabelsEverywhere()}}, + }, + }, + }, + } +} diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index d22e13e78e..20a36a8d77 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -70,7 +70,10 @@ func (bsnc *BaseSecondaryNetworkController) AddSecondaryNetworkResourceCommon(ob return nil } - np := convertMultiNetPolicyToNetPolicy(mp) + np, err := bsnc.convertMultiNetPolicyToNetPolicy(mp) + if err != nil { + return err + } if err := bsnc.addNetworkPolicy(np); err != nil { klog.Infof("MultiNetworkPolicy add failed for %s/%s, will try again later: %v", mp.Namespace, mp.Name, err) @@ -114,7 +117,10 @@ func (bsnc *BaseSecondaryNetworkController) UpdateSecondaryNetworkResourceCommon newShouldApply := bsnc.shouldApplyMultiPolicy(newMp) if oldShouldApply { // this multi-netpol no longer applies to this network controller, delete it - np := convertMultiNetPolicyToNetPolicy(oldMp) + np, err := bsnc.convertMultiNetPolicyToNetPolicy(oldMp) + if err != nil { + return err + } if err := bsnc.deleteNetworkPolicy(np); err != nil { klog.Infof("MultiNetworkPolicy delete failed for %s/%s, will try again later: %v", oldMp.Namespace, oldMp.Name, err) @@ -123,7 +129,10 @@ func (bsnc *BaseSecondaryNetworkController) UpdateSecondaryNetworkResourceCommon } if newShouldApply { // now this multi-netpol applies to this network controller - np := convertMultiNetPolicyToNetPolicy(newMp) + np, err := bsnc.convertMultiNetPolicyToNetPolicy(newMp) + if err != nil { + return err + } if err := bsnc.addNetworkPolicy(np); err != nil { klog.Infof("MultiNetworkPolicy add failed for %s/%s, will try again later: %v", newMp.Namespace, newMp.Name, err) @@ -161,7 +170,10 @@ func (bsnc *BaseSecondaryNetworkController) DeleteSecondaryNetworkResourceCommon if !ok { return fmt.Errorf("could not cast %T object to *multinetworkpolicyapi.MultiNetworkPolicy", obj) } - np := convertMultiNetPolicyToNetPolicy(mp) + np, err := bsnc.convertMultiNetPolicyToNetPolicy(mp) + if err != nil { + return err + } // delete this policy regardless it applies to this network controller, in case of missing update event if err := bsnc.deleteNetworkPolicy(np); err != nil { klog.Infof("MultiNetworkPolicy delete failed for %s/%s, will try again later: %v", From faa0a7e2999a8c68aac882bfa177caf0c07be438 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 18 May 2023 10:38:39 +0200 Subject: [PATCH 15/73] multi-net policies, IPAM less: allow ipBlock peers Signed-off-by: Miguel Duarte Barroso --- go-controller/pkg/ovn/base_network_controller_namespace.go | 5 ++--- go-controller/pkg/ovn/base_network_controller_pods.go | 5 ++++- go-controller/pkg/ovn/base_network_controller_secondary.go | 6 ------ .../pkg/ovn/base_secondary_layer2_network_controller.go | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller_namespace.go b/go-controller/pkg/ovn/base_network_controller_namespace.go index 58309da0dd..e79d9cc56d 100644 --- a/go-controller/pkg/ovn/base_network_controller_namespace.go +++ b/go-controller/pkg/ovn/base_network_controller_namespace.go @@ -66,9 +66,8 @@ func getNamespaceAddrSetDbIDs(namespaceName, controller string) *libovsdbops.DbO func (bnc *BaseNetworkController) WatchNamespaces() error { if bnc.IsSecondary() { // For secondary networks, we don't have to watch namespace events if - // multi-network policy support is not enabled. We don't support - // multi-network policy for IPAM-less secondary networks either. - if !util.IsMultiNetworkPoliciesSupportEnabled() || !bnc.doesNetworkRequireIPAM() { + // multi-network policy support is not enabled. + if !util.IsMultiNetworkPoliciesSupportEnabled() { return nil } } diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 1656caf382..364fe226f9 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -479,7 +479,10 @@ func (bnc *BaseNetworkController) podExpectedInLogicalCache(pod *kapi.Pod) bool if err != nil { return false } - return !util.PodWantsHostNetwork(pod) && !bnc.lsManager.IsNonHostSubnetSwitch(switchName) && !util.PodCompleted(pod) + return !util.PodWantsHostNetwork(pod) && + !(bnc.lsManager.IsNonHostSubnetSwitch(switchName) && + bnc.doesNetworkRequireIPAM()) && + !util.PodCompleted(pod) } func (bnc *BaseNetworkController) getExpectedSwitchName(pod *kapi.Pod) (string, error) { diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index 20a36a8d77..39801a6327 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -533,12 +533,6 @@ func (bsnc *BaseSecondaryNetworkController) WatchMultiNetworkPolicy() error { return nil } - // if this network does not have ipam, network policy is not supported. - if !bsnc.doesNetworkRequireIPAM() { - klog.Infof("Network policy is not supported on network %s", bsnc.GetNetworkName()) - return nil - } - if bsnc.policyHandler != nil { return nil } diff --git a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go index 74af5e3a1b..e2112cda32 100644 --- a/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/base_secondary_layer2_network_controller.go @@ -169,7 +169,7 @@ func (oc *BaseSecondaryLayer2NetworkController) initRetryFramework() { // For secondary networks, we don't have to watch namespace events if // multi-network policy support is not enabled. We don't support // multi-network policy for IPAM-less secondary networks either. - if util.IsMultiNetworkPoliciesSupportEnabled() && oc.doesNetworkRequireIPAM() { + if util.IsMultiNetworkPoliciesSupportEnabled() { oc.retryNamespaces = oc.newRetryFramework(factory.NamespaceType) oc.retryNetworkPolicies = oc.newRetryFramework(factory.MultiNetworkPolicyType) } From dc94736463a5c8cef15814b174291c6030dfb4dd Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 5 May 2023 12:06:49 +0200 Subject: [PATCH 16/73] multi-net policies, IPAM less, e2e: add test for IPAMless flat L2 policies Signed-off-by: Miguel Duarte Barroso --- test/e2e/multihoming.go | 44 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 166e80a1d5..c870b9d554 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -616,6 +616,7 @@ var _ = Describe("Multi Homing", func() { Context("multi-network policies", func() { const ( generatedNamespaceNamePrefix = "pepe" + blockedServerStaticIP = "192.168.200.30" ) var extraNamespace *v1.Namespace @@ -684,10 +685,12 @@ var _ = Describe("Multi Homing", func() { By("asserting the server pod has an IP from the configured range") serverIP, err := podIPForAttachment(cs, serverPodConfig.namespace, serverPodConfig.name, netConfig.name, 0) Expect(err).NotTo(HaveOccurred()) - By(fmt.Sprintf("asserting the server pod IP %v is from the configured range %v/%v", serverIP, netConfig.cidr, netPrefixLengthPerNode)) - subnet, err := getNetCIDRSubnet(netConfig.cidr) - Expect(err).NotTo(HaveOccurred()) - Expect(inRange(subnet, serverIP)).To(Succeed()) + if netConfig.cidr != "" { + By(fmt.Sprintf("asserting the server pod IP %v is from the configured range %v/%v", serverIP, netConfig.cidr, netPrefixLengthPerNode)) + subnet, err := getNetCIDRSubnet(netConfig.cidr) + Expect(err).NotTo(HaveOccurred()) + Expect(inRange(subnet, serverIP)).To(Succeed()) + } if doesPolicyFeatAnIPBlock(policy) { blockedIP, err := podIPForAttachment(cs, f.Namespace.Name, blockedClientPodConfig.name, netConfig.name, 0) @@ -1022,6 +1025,39 @@ var _ = Describe("Multi Homing", func() { port, ), ), + + table.Entry( + "for an IPAMless pure L2 overlay when the multi-net policy describes the allow-list using IPBlock", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "layer2", + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName, IPRequest: []string{clientIP}}}, + name: allowedClient(clientPodName), + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName, IPRequest: []string{blockedServerStaticIP + "/24"}}}, + name: blockedClient(clientPodName), + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName, IPRequest: []string{staticServerIP}}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + labels: map[string]string{"app": "stuff-doer"}, + }, + multiNetIngressLimitingIPBlockPolicy( + secondaryNetworkName, + metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "stuff-doer"}, + }, + mnpapi.IPBlock{ + CIDR: "192.168.200.0/24", + Except: []string{blockedServerStaticIP}, + }, + port, + ), + ), ) }) }) From ae94d5b6459d1fe9fda00a2f17cfb951c25475c8 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 23 May 2023 17:59:43 +0200 Subject: [PATCH 17/73] multi-net policies, IPAM less: only warn for IPAMed networks Signed-off-by: Miguel Duarte Barroso --- go-controller/pkg/ovn/base_network_controller_namespace.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/go-controller/pkg/ovn/base_network_controller_namespace.go b/go-controller/pkg/ovn/base_network_controller_namespace.go index e79d9cc56d..96b8914578 100644 --- a/go-controller/pkg/ovn/base_network_controller_namespace.go +++ b/go-controller/pkg/ovn/base_network_controller_namespace.go @@ -349,6 +349,10 @@ func (bnc *BaseNetworkController) updateNamespaceAclLogging(ns, aclAnnotation st } func (bnc *BaseNetworkController) getAllNamespacePodAddresses(ns string) []net.IP { + if !bnc.doesNetworkRequireIPAM() { + return nil + } + var ips []net.IP // Get all the pods in the namespace and append their IP to the address_set existingPods, err := bnc.watchFactory.GetPods(ns) From 28d824190f07398aff7beecd7cadbddd2b3572e6 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 26 May 2023 16:33:25 +0200 Subject: [PATCH 18/73] docs, multi-homing, policy: document IPAM-less policies restrictions Signed-off-by: Miguel Duarte Barroso --- docs/multi-homing.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/multi-homing.md b/docs/multi-homing.md index 2bd02f2921..a17a698628 100644 --- a/docs/multi-homing.md +++ b/docs/multi-homing.md @@ -293,9 +293,10 @@ Please note the `MultiNetworkPolicy` has the **exact same** API of the native `networking.k8s.io/v1` `NetworkPolicy`object; check its documentation for more information. -**Note:** It is currently **required** for the `net-attach-def`s referred to by -the `k8s.v1.cni.cncf.io/policy-for` annotation to have the `subnets` attribute -in its `spec.config` defined. +**Note:** `net-attach-def`s referred to by the `k8s.v1.cni.cncf.io/policy-for` +annotation without the subnet attribute defined are possible if the policy +**only features** `ipBlock` peers. If the `net-attach-def` features the +`subnet` attribute, it can also feature `namespaceSelectors` and `podSelectors`. ## Limitations OVN-K currently does **not** support: From 6af07aa7cf057b74affc8bb3b165616281900934 Mon Sep 17 00:00:00 2001 From: Jacob Tanenbaum Date: Mon, 22 May 2023 10:49:36 -0400 Subject: [PATCH 19/73] rename podLister to localPodLister for hybrid overlay when we create a hybrid overlay Node we pass LocalPodInformer which only knows about Pods local to the node they are running on. The hybrid overlay was assuming it was listening to all pods and manually filtering out those that are not on the local node. This commit fixes that assumption and renames the variables to reflect that they are localPodInformers and localPodListers Signed-off-by: Jacob Tanenbaum --- .../hybrid-overlay/pkg/controller/node.go | 14 ++++------- .../pkg/controller/node_linux.go | 24 +++++++++---------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/go-controller/hybrid-overlay/pkg/controller/node.go b/go-controller/hybrid-overlay/pkg/controller/node.go index 3d551ba257..b9b81da1de 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node.go +++ b/go-controller/hybrid-overlay/pkg/controller/node.go @@ -87,14 +87,14 @@ func NewNode( kube kube.Interface, nodeName string, nodeInformer cache.SharedIndexInformer, - podInformer cache.SharedIndexInformer, + localPodInformer cache.SharedIndexInformer, eventHandlerCreateFunction informer.EventHandlerCreateFunction, ) (*Node, error) { nodeLister := listers.NewNodeLister(nodeInformer.GetIndexer()) - podLister := listers.NewPodLister(podInformer.GetIndexer()) + localPodLister := listers.NewPodLister(localPodInformer.GetIndexer()) - controller, err := newNodeController(kube, nodeName, nodeLister, podLister) + controller, err := newNodeController(kube, nodeName, nodeLister, localPodLister) if err != nil { return nil, err } @@ -119,15 +119,12 @@ func NewNode( if err != nil { return nil, err } - n.podEventHandler, err = eventHandlerCreateFunction("pod", podInformer, + n.podEventHandler, err = eventHandlerCreateFunction("pod", localPodInformer, func(obj interface{}) error { pod, ok := obj.(*kapi.Pod) if !ok { return fmt.Errorf("object is not a pod") } - if pod.Spec.NodeName != nodeName { - return nil - } return n.controller.AddPod(pod) }, func(obj interface{}) error { @@ -135,9 +132,6 @@ func NewNode( if !ok { return fmt.Errorf("object is not a pod") } - if pod.Spec.NodeName != nodeName { - return nil - } return n.controller.DeletePod(pod) }, podChanged, diff --git a/go-controller/hybrid-overlay/pkg/controller/node_linux.go b/go-controller/hybrid-overlay/pkg/controller/node_linux.go index b51a578829..da62c32b0f 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node_linux.go +++ b/go-controller/hybrid-overlay/pkg/controller/node_linux.go @@ -53,8 +53,8 @@ type NodeController struct { // channel to indicate we need to update flows immediately flowChan chan struct{} - nodeLister listers.NodeLister - podLister listers.PodLister + nodeLister listers.NodeLister + localPodLister listers.PodLister } // newNodeController returns a node handler that listens for node events @@ -66,18 +66,18 @@ func newNodeController( _ kube.Interface, nodeName string, nodeLister listers.NodeLister, - podLister listers.PodLister, + localPodLister listers.PodLister, ) (nodeController, error) { node := &NodeController{ - nodeName: nodeName, - initState: new(uint32), - vxlanPort: uint16(config.HybridOverlay.VXLANPort), - flowCache: make(map[string]*flowCacheEntry), - flowMutex: sync.Mutex{}, - flowChan: make(chan struct{}, 1), - nodeLister: nodeLister, - podLister: podLister, + nodeName: nodeName, + initState: new(uint32), + vxlanPort: uint16(config.HybridOverlay.VXLANPort), + flowCache: make(map[string]*flowCacheEntry), + flowMutex: sync.Mutex{}, + flowChan: make(chan struct{}, 1), + nodeLister: nodeLister, + localPodLister: localPodLister, } atomic.StoreUint32(node.initState, hotypes.InitialStartup) return node, nil @@ -274,7 +274,7 @@ func (n *NodeController) AddNode(node *kapi.Node) error { err = n.hybridOverlayNodeUpdate(node) } if atomic.LoadUint32(n.initState) == hotypes.DistributedRouterInitialized { - pods, err := n.podLister.List(labels.Everything()) + pods, err := n.localPodLister.List(labels.Everything()) if err != nil { return fmt.Errorf("cannot fully initialize node %s for hybrid overlay, cannot list pods: %v", n.nodeName, err) } From 5742aa11dad687f839598e2219f2fe91e9a1fe5f Mon Sep 17 00:00:00 2001 From: Jacob Tanenbaum Date: Mon, 22 May 2023 10:55:53 -0400 Subject: [PATCH 20/73] code in the AddPod() for hybrid overlay that inits the Node not required this code in AddPod() is not required and is duplicate effort for the hybrid overlay. code was added to AddNode() that once the drIP and drMAC are set will loop through all pods and initialize them. This check is not requried. Signed-off-by: Jacob Tanenbaum --- .../hybrid-overlay/pkg/controller/node_linux.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/go-controller/hybrid-overlay/pkg/controller/node_linux.go b/go-controller/hybrid-overlay/pkg/controller/node_linux.go index da62c32b0f..8972f23220 100644 --- a/go-controller/hybrid-overlay/pkg/controller/node_linux.go +++ b/go-controller/hybrid-overlay/pkg/controller/node_linux.go @@ -112,16 +112,6 @@ func (n *NodeController) AddPod(pod *kapi.Pod) error { // if the IP/MAC or Annotations have changed ignoreLearn := true - if atomic.LoadUint32(n.initState) == hotypes.InitialStartup { - node, err := n.nodeLister.Get(n.nodeName) - if err != nil { - return fmt.Errorf("hybrid overlay not initialized on %s, and failed to get node data: %v", - n.nodeName, err) - } - if err = n.EnsureHybridOverlayBridge(node); err != nil { - return fmt.Errorf("failed to ensure hybrid overlay in pod handler: %v", err) - } - } if n.drMAC == nil || n.drIP == nil { return fmt.Errorf("empty values for DR MAC: %s or DR IP: %s on node %s", n.drMAC, n.drIP, n.nodeName) } @@ -278,7 +268,6 @@ func (n *NodeController) AddNode(node *kapi.Node) error { if err != nil { return fmt.Errorf("cannot fully initialize node %s for hybrid overlay, cannot list pods: %v", n.nodeName, err) } - for _, pod := range pods { err := n.AddPod(pod) if err != nil { From a747b14e75f75bd6e21ac99011cd6921be27e598 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Mon, 29 May 2023 13:23:08 +0200 Subject: [PATCH 21/73] e2e, kind: parametrize OCI_BIN when creating secondary interfaces Signed-off-by: Miguel Duarte Barroso --- contrib/kind.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/kind.sh b/contrib/kind.sh index e451cc6d2f..04e61f4a05 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -1066,11 +1066,11 @@ docker_create_second_interface() { echo "adding second interfaces to nodes" # Create the network as dual stack, regardless of the type of the deployment. Ignore if already exists. - docker network create --ipv6 --driver=bridge kindexgw --subnet=172.19.0.0/16 --subnet=fc00:f853:ccd:e798::/64 || true + "$OCI_BIN" network create --ipv6 --driver=bridge kindexgw --subnet=172.19.0.0/16 --subnet=fc00:f853:ccd:e798::/64 || true KIND_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}") for n in $KIND_NODES; do - docker network connect kindexgw "$n" + "$OCI_BIN" network connect kindexgw "$n" done } From d4a92b5b44ff9645d93edc4cb6f96d54ef326252 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Mon, 29 May 2023 13:27:54 +0200 Subject: [PATCH 22/73] multi-homing, e2e tests: extra interface for the kind deployment When on a multi-homing lane create a secondary network **without** external access (the ovnkube-node gateway init code would pick an interface with a default route as the interface to create `br-ex` on top of). Signed-off-by: Miguel Duarte Barroso --- contrib/kind.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/contrib/kind.sh b/contrib/kind.sh index 04e61f4a05..24b4fc99ea 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -1074,6 +1074,20 @@ docker_create_second_interface() { done } +docker_create_second_disconnected_interface() { + echo "adding second interfaces to nodes" + local bridge_name="${1:-kindexgw}" + echo "bridge: $bridge_name" + + # Create the network without subnets; ignore if already exists. + "$OCI_BIN" network create --internal --driver=bridge "$bridge_name" || true + + KIND_NODES=$(kind get nodes --name "${KIND_CLUSTER_NAME}") + for n in $KIND_NODES; do + "$OCI_BIN" network connect "$bridge_name" "$n" + done +} + sleep_until_pods_settle() { echo "Pods are all up, allowing things settle for 30 seconds..." sleep 30 @@ -1169,6 +1183,7 @@ fi if [ "$ENABLE_MULTI_NET" == true ]; then install_multus install_mpolicy_crd + docker_create_second_disconnected_interface "underlay" # localnet scenarios require an extra interface fi kubectl_wait_pods sleep_until_pods_settle From e579396d3aaf74c35e3f6d4269f2cb386e666d25 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 25 May 2023 17:02:01 +0200 Subject: [PATCH 23/73] multi-homing, localnet, tests: configure underlay This commit adds code to configure the cluster underlay to provide east/west connectivity between pods using a localnet secondary network, something which will allows us to test the underlay topology - since up to now we weren't actually testing anything meaningful: since all pods using a secondary network were scheduled in the same node, the underlay was not being used. Signed-off-by: Miguel Duarte Barroso --- test/e2e/localnet-underlay.go | 163 ++++++++++++++++++++++++++++++++++ test/e2e/multihoming.go | 36 +++++--- 2 files changed, 189 insertions(+), 10 deletions(-) create mode 100644 test/e2e/localnet-underlay.go diff --git a/test/e2e/localnet-underlay.go b/test/e2e/localnet-underlay.go new file mode 100644 index 0000000000..97e9edaa3f --- /dev/null +++ b/test/e2e/localnet-underlay.go @@ -0,0 +1,163 @@ +package e2e + +import ( + "context" + "fmt" + "strings" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" +) + +const ( + bridgeName = "ovsbr1" + add = "add-br" + del = "del-br" +) + +func setupUnderlay(ovsPods []v1.Pod, portName string, nadConfig networkAttachmentConfig) error { + for _, ovsPod := range ovsPods { + if err := addOVSBridge(ovsPod.Name, bridgeName); err != nil { + return err + } + + if nadConfig.vlanID > 0 { + if err := ovsEnableVLANAccessPort(ovsPod.Name, bridgeName, portName, nadConfig.vlanID); err != nil { + return err + } + } else { + if err := ovsAttachPortToBridge(ovsPod.Name, bridgeName, portName); err != nil { + return err + } + } + + if err := configureBridgeMappings( + ovsPod.Name, + defaultNetworkBridgeMapping(), + bridgeMapping(nadConfig.attachmentName(), bridgeName), + ); err != nil { + return err + } + } + return nil +} + +func teardownUnderlay(ovsPods []v1.Pod) error { + for _, ovsPod := range ovsPods { + if err := removeOVSBridge(ovsPod.Name, bridgeName); err != nil { + return err + } + } + return nil +} + +func ovsPods(clientSet clientset.Interface) []v1.Pod { + const ( + ovnKubernetesNamespace = "ovn-kubernetes" + ovsNodeLabel = "app=ovs-node" + ) + pods, err := clientSet.CoreV1().Pods(ovnKubernetesNamespace).List( + context.Background(), + metav1.ListOptions{LabelSelector: ovsNodeLabel}, + ) + if err != nil { + return nil + } + return pods.Items +} + +func addOVSBridge(ovnNodeName string, bridgeName string) error { + _, err := runCommand(ovsBridgeCommand(ovnNodeName, add, bridgeName)...) + if err != nil { + return fmt.Errorf("failed to ADD OVS bridge %s: %v", bridgeName, err) + } + return nil +} + +func removeOVSBridge(ovnNodeName string, bridgeName string) error { + _, err := runCommand(ovsBridgeCommand(ovnNodeName, del, bridgeName)...) + if err != nil { + return fmt.Errorf("failed to DELETE OVS bridge %s: %v", bridgeName, err) + } + return nil +} + +func ovsBridgeCommand(ovnNodeName string, addOrDeleteCmd string, bridgeName string) []string { + return []string{ + "kubectl", "-n", "ovn-kubernetes", "exec", ovnNodeName, "--", + "ovs-vsctl", addOrDeleteCmd, bridgeName, + } +} + +func ovsAttachPortToBridge(ovsNodeName string, bridgeName string, portName string) error { + cmd := []string{ + "kubectl", "-n", "ovn-kubernetes", "exec", ovsNodeName, "--", + "ovs-vsctl", "add-port", bridgeName, portName, + } + + if _, err := runCommand(cmd...); err != nil { + return fmt.Errorf("failed to add port %s to OVS bridge %s: %v", portName, bridgeName, err) + } + + return nil +} + +func ovsEnableVLANAccessPort(ovsNodeName string, bridgeName string, portName string, vlanID int) error { + cmd := []string{ + "kubectl", "-n", "ovn-kubernetes", "exec", ovsNodeName, "--", + "ovs-vsctl", "add-port", bridgeName, portName, fmt.Sprintf("tag=%d", vlanID), "vlan_mode=access", + } + + if _, err := runCommand(cmd...); err != nil { + return fmt.Errorf("failed to add port %s to OVS bridge %s: %v", portName, bridgeName, err) + } + + return nil +} + +type BridgeMapping struct { + physnet string + ovsBridge string +} + +func (bm BridgeMapping) String() string { + return fmt.Sprintf("%s:%s", bm.physnet, bm.ovsBridge) +} + +type BridgeMappings []BridgeMapping + +func (bms BridgeMappings) String() string { + return strings.Join(Map(bms, func(bm BridgeMapping) string { return bm.String() }), ",") +} + +func Map[T, V any](items []T, fn func(T) V) []V { + result := make([]V, len(items)) + for i, t := range items { + result[i] = fn(t) + } + return result +} + +func configureBridgeMappings(ovnNodeName string, mappings ...BridgeMapping) error { + mappingsString := fmt.Sprintf("external_ids:ovn-bridge-mappings=%s", BridgeMappings(mappings).String()) + cmd := []string{"kubectl", "-n", "ovn-kubernetes", "exec", ovnNodeName, + "--", "ovs-vsctl", "set", "open", ".", mappingsString, + } + _, err := runCommand(cmd...) + return err +} + +func defaultNetworkBridgeMapping() BridgeMapping { + return BridgeMapping{ + physnet: "physnet", + ovsBridge: "breth0", + } +} + +func bridgeMapping(physnet, ovsBridge string) BridgeMapping { + return BridgeMapping{ + physnet: physnet, + ovsBridge: ovsBridge, + } +} diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index c870b9d554..d028546863 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -266,6 +266,17 @@ var _ = Describe("Multi Homing", func() { clientPodConfig.namespace = f.Namespace.Name serverPodConfig.namespace = f.Namespace.Name + if netConfig.topology == "localnet" { + nodes := ovsPods(cs) + Expect(nodes).NotTo(BeEmpty()) + defer func() { + Expect(teardownUnderlay(nodes)).To(Succeed()) + }() + + const secondaryInterfaceName = "eth1" + Expect(setupUnderlay(nodes, secondaryInterfaceName, netConfig)).To(Succeed()) + } + By("creating the attachment configuration") _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Create( context.Background(), @@ -511,16 +522,17 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an Localnet secondary network when the pods are scheduled on the same node", + "can communicate over an Localnet secondary network when the pods are scheduled on different nodes", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", cidr: secondaryLocalnetNetworkCIDR, + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, @@ -530,15 +542,16 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an Localnet secondary network without IPAM when the pods are scheduled on the same node", + "can communicate over an Localnet secondary network without IPAM when the pods are scheduled on different nodes", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, isPrivileged: true, }, podConfiguration{ @@ -550,10 +563,11 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an localnet secondary network without IPAM when the pods are scheduled on the same node, with static IPs configured via network selection elements", + "can communicate over an localnet secondary network without IPAM when the pods are scheduled on different nodes, with static IPs configured via network selection elements", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{ @@ -561,7 +575,7 @@ var _ = Describe("Multi Homing", func() { IPRequest: []string{clientIP}, }}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{ @@ -574,16 +588,17 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an localnet secondary network with an IPv6 subnet when pods are scheduled on the same node", + "can communicate over an localnet secondary network with an IPv6 subnet when pods are scheduled on different nodes", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", cidr: secondaryIPv6CIDR, + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, @@ -593,16 +608,17 @@ var _ = Describe("Multi Homing", func() { }, ), table.Entry( - "can communicate over an localnet secondary network with a dual stack configuration when pods are scheduled on the same node", + "can communicate over an localnet secondary network with a dual stack configuration when pods are scheduled on different nodes", networkAttachmentConfig{ name: secondaryNetworkName, topology: "localnet", cidr: strings.Join([]string{secondaryLocalnetNetworkCIDR, secondaryIPv6CIDR}, ","), + vlanID: localnetVLANID, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, name: clientPodName, - nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + nodeSelector: map[string]string{nodeHostnameKey: workerOneNodeName}, }, podConfiguration{ attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, From ed147dea310f7bd99c992cb3f0fa2406dca7441a Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Tue, 30 May 2023 12:31:31 +0200 Subject: [PATCH 24/73] multi-net, e2e, kind: remove shift instruction The shift instruction when setting the multi-network flag is not required. Signed-off-by: Miguel Duarte Barroso --- contrib/kind.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/contrib/kind.sh b/contrib/kind.sh index 24b4fc99ea..d85128964b 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -321,8 +321,7 @@ parse_args() { ;; --isolated ) OVN_ISOLATED=true ;; - -mne | --multi-network-enable ) shift - ENABLE_MULTI_NET=true + -mne | --multi-network-enable ) ENABLE_MULTI_NET=true ;; --delete ) delete exit From 77ba4518746e8e7f0df95a5eeb071af91545ffa0 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Wed, 31 May 2023 13:29:07 +0200 Subject: [PATCH 25/73] Extract all namespace pod ips only when creating nsInfo. Signed-off-by: Nadia Pinaeva --- .../pkg/ovn/base_network_controller_namespace.go | 3 ++- .../pkg/ovn/base_network_controller_secondary.go | 3 +-- go-controller/pkg/ovn/namespace.go | 16 +++++++--------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller_namespace.go b/go-controller/pkg/ovn/base_network_controller_namespace.go index 96b8914578..e8ef6c8f7a 100644 --- a/go-controller/pkg/ovn/base_network_controller_namespace.go +++ b/go-controller/pkg/ovn/base_network_controller_namespace.go @@ -223,7 +223,7 @@ func (bnc *BaseNetworkController) multicastDeleteNamespace(ns *kapi.Namespace, n // ns is the name of the namespace, while namespace is the optional k8s namespace object // if no k8s namespace object is provided, this function will attempt to find it via informer cache func (bnc *BaseNetworkController) ensureNamespaceLockedCommon(ns string, readOnly bool, namespace *kapi.Namespace, - ips []net.IP, configureNamespace func(nsInfo *namespaceInfo, ns *kapi.Namespace) error) (*namespaceInfo, func(), error) { + ipsGetter func(ns string) []net.IP, configureNamespace func(nsInfo *namespaceInfo, ns *kapi.Namespace) error) (*namespaceInfo, func(), error) { bnc.namespacesMutex.Lock() nsInfo := bnc.namespaces[ns] nsInfoExisted := false @@ -239,6 +239,7 @@ func (bnc *BaseNetworkController) ensureNamespaceLockedCommon(ns string, readOnl defer bnc.namespacesMutex.Unlock() // create the adddress set for the new namespace var err error + ips := ipsGetter(ns) nsInfo.addressSet, err = bnc.createNamespaceAddrSetAllPods(ns, ips) if err != nil { return nil, nil, fmt.Errorf("failed to create address set for namespace: %s, error: %v", ns, err) diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index 39801a6327..3239eb2f72 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -481,8 +481,7 @@ func (bsnc *BaseSecondaryNetworkController) AddNamespaceForSecondaryNetwork(ns * // and returns it with its mutex locked. // ns is the name of the namespace, while namespace is the optional k8s namespace object func (bsnc *BaseSecondaryNetworkController) ensureNamespaceLockedForSecondaryNetwork(ns string, readOnly bool, namespace *kapi.Namespace) (*namespaceInfo, func(), error) { - ips := bsnc.getAllNamespacePodAddresses(ns) - return bsnc.ensureNamespaceLockedCommon(ns, readOnly, namespace, ips, bsnc.configureNamespaceCommon) + return bsnc.ensureNamespaceLockedCommon(ns, readOnly, namespace, bsnc.getAllNamespacePodAddresses, bsnc.configureNamespaceCommon) } func (bsnc *BaseSecondaryNetworkController) updateNamespaceForSecondaryNetwork(old, newer *kapi.Namespace) error { diff --git a/go-controller/pkg/ovn/namespace.go b/go-controller/pkg/ovn/namespace.go index 7b86638427..4548282ec5 100644 --- a/go-controller/pkg/ovn/namespace.go +++ b/go-controller/pkg/ovn/namespace.go @@ -278,16 +278,14 @@ func (oc *DefaultNetworkController) deleteNamespace(ns *kapi.Namespace) error { // with its mutex locked. // ns is the name of the namespace, while namespace is the optional k8s namespace object func (oc *DefaultNetworkController) ensureNamespaceLocked(ns string, readOnly bool, namespace *kapi.Namespace) (*namespaceInfo, func(), error) { - var ips []net.IP - - // special handling of host network namespace. issues/3381 - if config.Kubernetes.HostNetworkNamespace != "" && ns == config.Kubernetes.HostNetworkNamespace { - ips = oc.getAllHostNamespaceAddresses() - } else { - ips = oc.getAllNamespacePodAddresses(ns) + ipsGetter := func(ns string) []net.IP { + // special handling of host network namespace. issues/3381 + if config.Kubernetes.HostNetworkNamespace != "" && ns == config.Kubernetes.HostNetworkNamespace { + return oc.getAllHostNamespaceAddresses() + } + return oc.getAllNamespacePodAddresses(ns) } - - return oc.ensureNamespaceLockedCommon(ns, readOnly, namespace, ips, oc.configureNamespace) + return oc.ensureNamespaceLockedCommon(ns, readOnly, namespace, ipsGetter, oc.configureNamespace) } func (oc *DefaultNetworkController) getAllHostNamespaceAddresses() []net.IP { From 91e8a3bf787168a26020a0fac66b6fb97cd97805 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Wed, 31 May 2023 23:23:40 +0200 Subject: [PATCH 26/73] Fix cleanupStalePodSNATs if no podIPs are found In CI we are observing: E0531 19:52:52.309098 1 obj_retry.go:627] Failed to update *v1.Node, old=ip-10-0-133-36.ec2.internal, new=ip-10-0-133-36.ec2.internal, error: error creating gateway for node ip-10-0-133-36.ec2.internal: failed to init shared interface gateway: failed to sync stale SNATs on node ip-10-0-133-36.ec2.internal: unable to fetch podIPs for pod openshift-multus/network-metrics-daemon-cr75v: pod openshift-multus/network-metrics-daemon-cr75v: no pod IPs found I0531 19:52:52.309185 1 event.go:285] Event(v1.ObjectReference{Kind:"Node", Namespace:"", Name:"ip-10-0-133-36.ec2.internal", UID:"65797716-b2a8-43fc-a216-8154fecee781", APIVersion:"v1", ResourceVersion:"203498", FieldPath:""}): type: 'Warning' reason: 'ErrorUpdatingResource' error creating gateway for node ip-10-0-133-36.ec2.internal: failed to init shared interface gateway: failed to sync stale SNATs on node ip-10-0-133-36.ec2.internal: unable to fetch podIPs for pod openshift-multus/network-metrics-daemon-cr75v: pod openshift-multus/network-metrics-daemon-cr75v: no pod IPs found which is happening because pod is scheduled but IP allocation has not happened and since this sync is called from watch nodes, node add does not succeed, so we have a chicken-egg problem where watch pods can't start. In reality we don't care about pods that don't have an IP because there is nothing to cleanup for that pod in that case. Signed-off-by: Surya Seetharaman --- go-controller/pkg/ovn/gateway_init.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/ovn/gateway_init.go b/go-controller/pkg/ovn/gateway_init.go index dded7d0c15..9b163d6c90 100644 --- a/go-controller/pkg/ovn/gateway_init.go +++ b/go-controller/pkg/ovn/gateway_init.go @@ -10,6 +10,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" utilnet "k8s.io/utils/net" libovsdbclient "github.com/ovn-org/libovsdb/client" @@ -72,7 +73,15 @@ func (oc *DefaultNetworkController) cleanupStalePodSNATs(nodeName string, nodeIP } } podIPs, err := util.GetPodIPsOfNetwork(&pod, oc.NetInfo) - if err != nil { + if err != nil && errors.Is(err, util.ErrNoPodIPFound) { + // It is possible that the pod is scheduled during this time, but the LSP add or + // IP Allocation has not happened and it is waiting for the WatchPods to start + // after WatchNodes completes (This function is called during syncNodes). So since + // the pod doesn't have any IPs, there is no SNAT here to keep for this pod so we skip + // this pod from processing and move onto the next one. + klog.Warningf("Unable to fetch podIPs for pod %s/%s: %v", pod.Namespace, pod.Name, err) + continue // no-op + } else if err != nil { return fmt.Errorf("unable to fetch podIPs for pod %s/%s: %w", pod.Namespace, pod.Name, err) } for _, podIP := range podIPs { From 0927ea4808ef95d9484929c28f719ccf4230e373 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 31 May 2023 19:02:37 +0000 Subject: [PATCH 27/73] Increase unit test timeout to 20m for ovn pkg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a normal, green, unit test run ovn package is already close to the limit of 10m timeout to run the unit tests with the eventual timeout actually happening sometimes: ok github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn 593.856s Signed-off-by: Jaime Caamaño Ruiz --- go-controller/hack/test-go.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/go-controller/hack/test-go.sh b/go-controller/hack/test-go.sh index 9e39778b43..fa691a3c10 100755 --- a/go-controller/hack/test-go.sh +++ b/go-controller/hack/test-go.sh @@ -49,6 +49,10 @@ function testrun { if [ ! -z "${COVERALLS:-}" ]; then args="${args} -test.coverprofile=${idx}.coverprofile " fi + if [[ " ${big_pkgs[@]} " =~ " $pkg " ]]; then + echo "Increasing timeout to 20m for package ${pkg}" + args="${args} -test.timeout=20m" + fi if grep -q -r "ginkgo" ."${path}"; then prefix=$(echo "${path}" | cut -c 2- | sed 's,/,_,g') ginkgoargs="-ginkgo.v ${ginkgo_focus} -ginkgo.reportFile ${TEST_REPORT_DIR}/junit-${prefix}.xml" @@ -64,6 +68,9 @@ function testrun { # These packages requires root for network namespace manipulation in unit tests root_pkgs=("github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node") +# These packages are big and require more than the 10m default to run the unit tests +big_pkgs=("github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn") + i=0 for pkg in ${PKGS}; do testrun "${i}" "${pkg}" From a1b8aefc4e3d1b4ec0e9491e3a9e3e7ae8cabdf8 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 1 Jun 2023 17:14:15 +0200 Subject: [PATCH 28/73] Stop using a waitgroup in route manager Route manager doesn't make any use of the waitgroup it was created with. Additionally it was only calling `wg.Done()` without calling `wg.Add(1)` first. Signed-off-by: Patryk Diak --- .../node/default_node_network_controller.go | 9 +++-- .../pkg/node/gateway_init_linux_test.go | 38 +++++++++++++------ .../pkg/node/management-port_linux_test.go | 17 ++++++--- go-controller/pkg/node/route_manager.go | 6 +-- go-controller/pkg/node/route_manager_test.go | 9 +++-- 5 files changed, 51 insertions(+), 28 deletions(-) diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index 29da5cb2bb..c0c2c6f9c5 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -117,7 +117,7 @@ func newDefaultNodeNetworkController(cnnci *CommonNodeNetworkControllerInfo, sto stopChan: stopChan, wg: wg, }, - routeManager: newRouteManager(wg, true, 2*time.Minute), + routeManager: newRouteManager(true, 2*time.Minute), } } @@ -631,8 +631,11 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { if err := level.Set("5"); err != nil { klog.Errorf("Setting klog \"loglevel\" to 5 failed, err: %v", err) } - go nc.routeManager.run(ctx.Done()) - + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + nc.routeManager.run(nc.stopChan) + }() if node, err = nc.Kube.GetNode(nc.name); err != nil { return fmt.Errorf("error retrieving node %s: %v", nc.name, err) } diff --git a/go-controller/pkg/node/gateway_init_linux_test.go b/go-controller/pkg/node/gateway_init_linux_test.go index 1dd182d511..6a297670e8 100644 --- a/go-controller/pkg/node/gateway_init_linux_test.go +++ b/go-controller/pkg/node/gateway_init_linux_test.go @@ -253,9 +253,10 @@ func shareGatewayInterfaceTest(app *cli.App, testNS ns.NetNS, Expect(err).NotTo(HaveOccurred()) err = nodeAnnotator.Run() Expect(err).NotTo(HaveOccurred()) - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() rm.run(stop) return nil @@ -616,9 +617,10 @@ func shareGatewayInterfaceDPUTest(app *cli.App, testNS ns.NetNS, ifAddrs := ovntest.MustParseIPNets(hostCIDR) ifAddrs[0].IP = ovntest.MustParseIP(dpuIP) - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() rm.run(stop) return nil @@ -729,6 +731,7 @@ func shareGatewayInterfaceDPUHostTest(app *cli.App, testNS ns.NetNS, uplinkName, // must run route manager manually which is usually started with nc.Start() wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() nc.routeManager.run(stop) return nil @@ -1046,9 +1049,10 @@ OFPT_GET_CONFIG_REPLY (xid=0x4): frags=normal miss_send_len=0`, Expect(err).NotTo(HaveOccurred()) err = nodeAnnotator.Run() Expect(err).NotTo(HaveOccurred()) - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() rm.run(stop) return nil @@ -1542,10 +1546,13 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return(nil, nil) netlinkMock.On("RouteAdd", expectedRoute).Return(nil) wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) wg.Add(1) - go rm.run(stopCh) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() @@ -1586,10 +1593,13 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("RouteListFiltered", mock.Anything, mock.Anything, mock.Anything).Return([]netlink.Route{*previousRoute}, nil) netlinkMock.On("RouteReplace", expectedRoute).Return(nil) wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) - go rm.run(stopCh) wg.Add(1) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() @@ -1603,10 +1613,13 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("LinkByName", mock.Anything).Return(nil, fmt.Errorf("failed to find interface")) gwIPs := []net.IP{net.ParseIP("10.0.0.11")} wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) - go rm.run(stopCh) wg.Add(1) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() @@ -1624,10 +1637,13 @@ var _ = Describe("Gateway unit tests", func() { netlinkMock.On("LinkByName", mock.Anything).Return(nil, nil) netlinkMock.On("LinkSetUp", mock.Anything).Return(nil) wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) - go rm.run(stopCh) wg.Add(1) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() diff --git a/go-controller/pkg/node/management-port_linux_test.go b/go-controller/pkg/node/management-port_linux_test.go index fbd729feff..7147c3ee20 100644 --- a/go-controller/pkg/node/management-port_linux_test.go +++ b/go-controller/pkg/node/management-port_linux_test.go @@ -267,16 +267,17 @@ func testManagementPort(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns.Net nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, EgressServiceClient: &egressservicefake.Clientset{}}, existingNode.Name) waiter := newStartupWaiter() wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) defer func() { close(stopCh) wg.Wait() }() + wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() - wg.Add(1) rm.run(stopCh) return nil }) @@ -360,10 +361,13 @@ func testManagementPortDPU(ctx *cli.Context, fexec *ovntest.FakeExec, testNS ns. nodeAnnotator := kube.NewNodeAnnotator(&kube.KubeOVN{Kube: kube.Kube{KClient: fakeClient}, EIPClient: egressipv1fake.NewSimpleClientset(), EgressFirewallClient: &egressfirewallfake.Clientset{}, EgressServiceClient: &egressservicefake.Clientset{}}, existingNode.Name) waiter := newStartupWaiter() wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) - go rm.run(stopCh) wg.Add(1) + go func() { + defer wg.Done() + rm.run(stopCh) + }() defer func() { close(stopCh) wg.Wait() @@ -446,11 +450,12 @@ func testManagementPortDPUHost(ctx *cli.Context, fexec *ovntest.FakeExec, testNS _, err = config.InitConfig(ctx, fexec, nil) Expect(err).NotTo(HaveOccurred()) wg := &sync.WaitGroup{} - rm := newRouteManager(wg, true, 10*time.Second) + rm := newRouteManager(true, 10*time.Second) stopCh := make(chan struct{}) + wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer GinkgoRecover() - wg.Add(1) rm.run(stopCh) return nil }) diff --git a/go-controller/pkg/node/route_manager.go b/go-controller/pkg/node/route_manager.go index 346c273a8b..f7eddcb940 100644 --- a/go-controller/pkg/node/route_manager.go +++ b/go-controller/pkg/node/route_manager.go @@ -3,7 +3,6 @@ package node import ( "fmt" "net" - "sync" "time" "github.com/vishvananda/netlink" @@ -22,21 +21,19 @@ type routeManager struct { store map[string]routesPerLink // key is link name addRouteCh chan routesPerLink delRouteCh chan routesPerLink - wg *sync.WaitGroup } // newRouteManager manages routes which include adding and deletion of routes. It also manages restoration of managed routes. // Begin managing routes by calling run() to start the manager. // Routes should be added via add(route) and deletion via del(route) functions only. // All other functions are used internally. -func newRouteManager(wg *sync.WaitGroup, logRouteChanges bool, syncPeriod time.Duration) *routeManager { +func newRouteManager(logRouteChanges bool, syncPeriod time.Duration) *routeManager { return &routeManager{ logRouteChanges: logRouteChanges, syncPeriod: syncPeriod, store: make(map[string]routesPerLink), addRouteCh: make(chan routesPerLink, 5), delRouteCh: make(chan routesPerLink, 5), - wg: wg, } } @@ -47,7 +44,6 @@ func (rm *routeManager) run(stopCh <-chan struct{}) { subscribed, routeEventCh = subscribeNetlinkRouteEvents(stopCh) ticker := time.NewTicker(rm.syncPeriod) defer ticker.Stop() - defer rm.wg.Done() for { select { diff --git a/go-controller/pkg/node/route_manager_test.go b/go-controller/pkg/node/route_manager_test.go index 7c40f9fb5d..c7b9351b77 100644 --- a/go-controller/pkg/node/route_manager_test.go +++ b/go-controller/pkg/node/route_manager_test.go @@ -34,7 +34,9 @@ var _ = ginkgo.Describe("Route Manager", func() { loIP := net.IPv4(127, 1, 1, 1) loIPDiff := net.IPv4(127, 1, 1, 2) loGWIP := net.IPv4(127, 1, 1, 254) - if os.Getuid() != 0 { + + if os.Getenv("NOROOT") == "TRUE" { + defer ginkgo.GinkgoRecover() ginkgo.Skip("Test requires root privileges") } @@ -46,10 +48,9 @@ var _ = ginkgo.Describe("Route Manager", func() { wg = &sync.WaitGroup{} stopCh = make(chan struct{}) - wg.Add(1) syncPeriod := 10 * time.Millisecond logAllActivity := true - rm = newRouteManager(wg, logAllActivity, syncPeriod) + rm = newRouteManager(logAllActivity, syncPeriod) err = testNS.Do(func(netNS ns.NetNS) error { defer ginkgo.GinkgoRecover() loLink, err = netlink.LinkByName(loLinkName) @@ -73,7 +74,9 @@ var _ = ginkgo.Describe("Route Manager", func() { return nil }) + wg.Add(1) go testNS.Do(func(netNS ns.NetNS) error { + defer wg.Done() defer ginkgo.GinkgoRecover() rm.run(stopCh) return nil From 73682229d0c786a0c258d46c1862e303dcb4df36 Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Mon, 15 May 2023 16:31:57 +0200 Subject: [PATCH 29/73] CVE-2022-41723: net/http, golang.org/x/net/http2: avoid quadratic complexity in HPACK decoding This commit upgrades golang.org/x/net module dependency to fix CVE 2022-41723 (https://pkg.go.dev/vuln/GO-2023-1571). Signed-off-by: Periyasamy Palanisamy --- go-controller/go.mod | 8 +- go-controller/go.sum | 12 +-- .../vendor/golang.org/x/net/html/doc.go | 21 +++++ .../vendor/golang.org/x/net/html/escape.go | 81 +++++++++++++++++++ .../vendor/golang.org/x/net/html/render.go | 2 +- .../vendor/golang.org/x/net/html/token.go | 10 +-- .../vendor/golang.org/x/net/http2/pipe.go | 6 +- .../vendor/golang.org/x/net/http2/server.go | 7 +- .../golang.org/x/net/http2/transport.go | 41 +++++++--- .../golang.org/x/net/internal/socks/socks.go | 2 +- .../vendor/golang.org/x/sys/unix/mkerrors.sh | 3 +- .../golang.org/x/sys/unix/zerrors_linux.go | 14 ++++ .../golang.org/x/sys/windows/env_windows.go | 6 +- .../golang.org/x/sys/windows/exec_windows.go | 7 +- .../golang.org/x/sys/windows/service.go | 7 ++ .../golang.org/x/sys/windows/svc/service.go | 9 +++ .../golang.org/x/sys/windows/types_windows.go | 6 +- .../x/sys/windows/zsyscall_windows.go | 9 +++ go-controller/vendor/modules.txt | 6 +- 19 files changed, 213 insertions(+), 44 deletions(-) diff --git a/go-controller/go.mod b/go-controller/go.mod index 2488f6e7e6..e9def879e9 100644 --- a/go-controller/go.mod +++ b/go-controller/go.mod @@ -11,6 +11,7 @@ require ( github.com/containernetworking/cni v1.1.2 github.com/containernetworking/plugins v1.2.0 github.com/coreos/go-iptables v0.6.0 + github.com/fsnotify/fsnotify v1.6.0 github.com/google/go-cmp v0.5.9 github.com/google/uuid v1.3.0 github.com/gorilla/mux v1.8.0 @@ -34,9 +35,9 @@ require ( github.com/stretchr/testify v1.8.0 github.com/urfave/cli/v2 v2.2.0 github.com/vishvananda/netlink v1.2.1-beta.2.0.20230420174744-55c8b9515a01 - golang.org/x/net v0.7.0 + golang.org/x/net v0.10.0 golang.org/x/sync v0.1.0 - golang.org/x/sys v0.7.0 + golang.org/x/sys v0.8.0 golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 google.golang.org/grpc v1.49.0 google.golang.org/protobuf v1.28.1 @@ -61,7 +62,6 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/emicklei/go-restful/v3 v3.9.0 // indirect github.com/evanphx/json-patch v4.12.0+incompatible // indirect - github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/go-logr/logr v1.2.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect @@ -96,7 +96,7 @@ require ( go.opencensus.io v0.23.0 // indirect golang.org/x/crypto v0.1.0 // indirect golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 // indirect - golang.org/x/term v0.5.0 // indirect + golang.org/x/term v0.8.0 // indirect golang.org/x/text v0.9.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20220502173005-c8bf987b8c21 // indirect diff --git a/go-controller/go.sum b/go-controller/go.sum index a0bcc1057b..341d8f8dae 100644 --- a/go-controller/go.sum +++ b/go-controller/go.sum @@ -987,8 +987,8 @@ golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/net v0.3.1-0.20221206200815-1e63c2f08a10/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= -golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= -golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1124,14 +1124,14 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= -golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/go-controller/vendor/golang.org/x/net/html/doc.go b/go-controller/vendor/golang.org/x/net/html/doc.go index 822ed42a04..2466ae3d9a 100644 --- a/go-controller/vendor/golang.org/x/net/html/doc.go +++ b/go-controller/vendor/golang.org/x/net/html/doc.go @@ -92,6 +92,27 @@ example, to process each anchor node in depth-first order: The relevant specifications include: https://html.spec.whatwg.org/multipage/syntax.html and https://html.spec.whatwg.org/multipage/syntax.html#tokenization + +# Security Considerations + +Care should be taken when parsing and interpreting HTML, whether full documents +or fragments, within the framework of the HTML specification, especially with +regard to untrusted inputs. + +This package provides both a tokenizer and a parser, which implement the +tokenization, and tokenization and tree construction stages of the WHATWG HTML +parsing specification respectively. While the tokenizer parses and normalizes +individual HTML tokens, only the parser constructs the DOM tree from the +tokenized HTML, as described in the tree construction stage of the +specification, dynamically modifying or extending the docuemnt's DOM tree. + +If your use case requires semantically well-formed HTML documents, as defined by +the WHATWG specification, the parser should be used rather than the tokenizer. + +In security contexts, if trust decisions are being made using the tokenized or +parsed content, the input must be re-serialized (for instance by using Render or +Token.String) in order for those trust decisions to hold, as the process of +tokenization or parsing may alter the content. */ package html // import "golang.org/x/net/html" diff --git a/go-controller/vendor/golang.org/x/net/html/escape.go b/go-controller/vendor/golang.org/x/net/html/escape.go index d856139620..04c6bec210 100644 --- a/go-controller/vendor/golang.org/x/net/html/escape.go +++ b/go-controller/vendor/golang.org/x/net/html/escape.go @@ -193,6 +193,87 @@ func lower(b []byte) []byte { return b } +// escapeComment is like func escape but escapes its input bytes less often. +// Per https://github.com/golang/go/issues/58246 some HTML comments are (1) +// meaningful and (2) contain angle brackets that we'd like to avoid escaping +// unless we have to. +// +// "We have to" includes the '&' byte, since that introduces other escapes. +// +// It also includes those bytes (not including EOF) that would otherwise end +// the comment. Per the summary table at the bottom of comment_test.go, this is +// the '>' byte that, per above, we'd like to avoid escaping unless we have to. +// +// Studying the summary table (and T actions in its '>' column) closely, we +// only need to escape in states 43, 44, 49, 51 and 52. State 43 is at the +// start of the comment data. State 52 is after a '!'. The other three states +// are after a '-'. +// +// Our algorithm is thus to escape every '&' and to escape '>' if and only if: +// - The '>' is after a '!' or '-' (in the unescaped data) or +// - The '>' is at the start of the comment data (after the opening ""); err != nil { diff --git a/go-controller/vendor/golang.org/x/net/html/token.go b/go-controller/vendor/golang.org/x/net/html/token.go index 50f7c6aac8..5c2a1f4efa 100644 --- a/go-controller/vendor/golang.org/x/net/html/token.go +++ b/go-controller/vendor/golang.org/x/net/html/token.go @@ -110,7 +110,7 @@ func (t Token) String() string { case SelfClosingTagToken: return "<" + t.tagString() + "/>" case CommentToken: - return "" + return "" case DoctypeToken: return "" } @@ -598,10 +598,10 @@ scriptDataDoubleEscapeEnd: // readComment reads the next comment token starting with "